Home | History | Annotate | Download | only in NVPTX
      1 ; ## Full FP16 support enabled by default.
      2 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
      3 ; RUN:          -O0 -disable-post-ra -disable-fp-elim -verify-machineinstrs \
      4 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-F16 %s
      5 ; ## FP16 support explicitly disabled.
      6 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
      7 ; RUN:          -O0 -disable-post-ra -disable-fp-elim --nvptx-no-f16-math \
      8 ; RUN:           -verify-machineinstrs \
      9 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s
     10 ; ## FP16 is not supported by hardware.
     11 ; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
     12 ; RUN:          -disable-post-ra -disable-fp-elim -verify-machineinstrs \
     13 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s
     14 
     15 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
     16 
     17 ; CHECK-LABEL: test_ret_const(
     18 ; CHECK:     mov.u32         [[T:%r[0-9+]]], 1073757184;
     19 ; CHECK:     mov.b32         [[R:%hh[0-9+]]], [[T]];
     20 ; CHECK:     st.param.b32    [func_retval0+0], [[R]];
     21 ; CHECK-NEXT: ret;
     22 define <2 x half> @test_ret_const() #0 {
     23   ret <2 x half> <half 1.0, half 2.0>
     24 }
     25 
     26 ; CHECK-LABEL: test_extract_0(
     27 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_extract_0_param_0];
     28 ; CHECK:      mov.b32         {[[R:%h[0-9]+]], %tmp_hi}, [[A]];
     29 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
     30 ; CHECK:      ret;
     31 define half @test_extract_0(<2 x half> %a) #0 {
     32   %e = extractelement <2 x half> %a, i32 0
     33   ret half %e
     34 }
     35 
     36 ; CHECK-LABEL: test_extract_1(
     37 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_extract_1_param_0];
     38 ; CHECK:      mov.b32         {%tmp_lo, [[R:%h[0-9]+]]}, [[A]];
     39 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
     40 ; CHECK:      ret;
     41 define half @test_extract_1(<2 x half> %a) #0 {
     42   %e = extractelement <2 x half> %a, i32 1
     43   ret half %e
     44 }
     45 
     46 ; CHECK-LABEL: test_extract_i(
     47 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_extract_i_param_0];
     48 ; CHECK-DAG:  ld.param.u64    [[IDX:%rd[0-9]+]], [test_extract_i_param_1];
     49 ; CHECK-DAG:  setp.eq.s64     [[PRED:%p[0-9]+]], [[IDX]], 0;
     50 ; CHECK-DAG:  mov.b32         {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[A]];
     51 ; CHECK:      selp.b16        [[R:%h[0-9]+]], [[E0]], [[E1]], [[PRED]];
     52 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
     53 ; CHECK:      ret;
     54 define half @test_extract_i(<2 x half> %a, i64 %idx) #0 {
     55   %e = extractelement <2 x half> %a, i64 %idx
     56   ret half %e
     57 }
     58 
     59 ; CHECK-LABEL: test_fadd(
     60 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fadd_param_0];
     61 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fadd_param_1];
     62 ;
     63 ; CHECK-F16-NEXT:   add.rn.f16x2   [[R:%hh[0-9]+]], [[A]], [[B]];
     64 ;
     65 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
     66 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
     67 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
     68 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
     69 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
     70 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
     71 ; CHECK-NOF16-DAG:  add.rn.f32     [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
     72 ; CHECK-NOF16-DAG:  add.rn.f32     [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
     73 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
     74 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
     75 ; CHECK-NOF16:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
     76 ;
     77 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
     78 ; CHECK-NEXT: ret;
     79 define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 {
     80   %r = fadd <2 x half> %a, %b
     81   ret <2 x half> %r
     82 }
     83 
     84 ; Check that we can lower fadd with immediate arguments.
     85 ; CHECK-LABEL: test_fadd_imm_0(
     86 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fadd_imm_0_param_0];
     87 ;
     88 ; CHECK-F16:        mov.u32        [[I:%r[0-9+]]], 1073757184;
     89 ; CHECK-F16:        mov.b32        [[IHH:%hh[0-9+]]], [[I]];
     90 ; CHECK-F16:        add.rn.f16x2   [[R:%hh[0-9]+]], [[A]], [[IHH]];
     91 ;
     92 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
     93 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
     94 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
     95 ; CHECK-NOF16-DAG:  add.rn.f32     [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000;
     96 ; CHECK-NOF16-DAG:  add.rn.f32     [[FR1:%f[0-9]+]], [[FA1]], 0f40000000;
     97 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
     98 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
     99 ; CHECK-NOF16:      mov.b32        [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    100 ;
    101 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    102 ; CHECK-NEXT: ret;
    103 define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 {
    104   %r = fadd <2 x half> <half 1.0, half 2.0>, %a
    105   ret <2 x half> %r
    106 }
    107 
    108 ; CHECK-LABEL: test_fadd_imm_1(
    109 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fadd_imm_1_param_0];
    110 ;
    111 ; CHECK-F16:        mov.u32        [[I:%r[0-9+]]], 1073757184;
    112 ; CHECK-F16:        mov.b32        [[IHH:%hh[0-9+]]], [[I]];
    113 ; CHECK-F16:        add.rn.f16x2   [[R:%hh[0-9]+]], [[B]], [[IHH]];
    114 ;
    115 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    116 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    117 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    118 ; CHECK-NOF16-DAG:  add.rn.f32     [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000;
    119 ; CHECK-NOF16-DAG:  add.rn.f32     [[FR1:%f[0-9]+]], [[FA1]], 0f40000000;
    120 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
    121 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
    122 ; CHECK-NOF16:      mov.b32        [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    123 ;
    124 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    125 ; CHECK-NEXT: ret;
    126 define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 {
    127   %r = fadd <2 x half> %a, <half 1.0, half 2.0>
    128   ret <2 x half> %r
    129 }
    130 
    131 ; CHECK-LABEL: test_fsub(
    132 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fsub_param_0];
    133 ;
    134 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fsub_param_1];
    135 ; CHECK-F16-NEXT:   sub.rn.f16x2   [[R:%hh[0-9]+]], [[A]], [[B]];
    136 ;
    137 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    138 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    139 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    140 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    141 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    142 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    143 ; CHECK-NOF16-DAG:  sub.rn.f32     [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
    144 ; CHECK-NOF16-DAG:  sub.rn.f32     [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
    145 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
    146 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
    147 ; CHECK-NOF16:      mov.b32        [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    148 ;
    149 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    150 ; CHECK-NEXT: ret;
    151 define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 {
    152   %r = fsub <2 x half> %a, %b
    153   ret <2 x half> %r
    154 }
    155 
    156 ; CHECK-LABEL: test_fneg(
    157 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fneg_param_0];
    158 ;
    159 ; CHECK-F16:        mov.u32        [[I0:%r[0-9+]]], 0;
    160 ; CHECK-F16:        mov.b32        [[IHH0:%hh[0-9+]]], [[I0]];
    161 ; CHECK-F16-NEXT:   sub.rn.f16x2   [[R:%hh[0-9]+]], [[IHH0]], [[A]];
    162 ;
    163 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    164 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    165 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    166 ; CHECK-NOF16-DAG:  mov.f32        [[Z:%f[0-9]+]], 0f00000000;
    167 ; CHECK-NOF16-DAG:  sub.rn.f32     [[FR0:%f[0-9]+]], [[Z]], [[FA0]];
    168 ; CHECK-NOF16-DAG:  sub.rn.f32     [[FR1:%f[0-9]+]], [[Z]], [[FA1]];
    169 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
    170 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
    171 ; CHECK-NOF16:      mov.b32        [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    172 ;
    173 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    174 ; CHECK-NEXT: ret;
    175 define <2 x half> @test_fneg(<2 x half> %a) #0 {
    176   %r = fsub <2 x half> <half 0.0, half 0.0>, %a
    177   ret <2 x half> %r
    178 }
    179 
    180 ; CHECK-LABEL: test_fmul(
    181 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fmul_param_0];
    182 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fmul_param_1];
    183 ; CHECK-F16-NEXT: mul.rn.f16x2     [[R:%hh[0-9]+]], [[A]], [[B]];
    184 ;
    185 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    186 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    187 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    188 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    189 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    190 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    191 ; CHECK-NOF16-DAG:  mul.rn.f32     [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
    192 ; CHECK-NOF16-DAG:  mul.rn.f32     [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
    193 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
    194 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
    195 ; CHECK-NOF16:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    196 ;
    197 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    198 ; CHECK-NEXT: ret;
    199 define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 {
    200   %r = fmul <2 x half> %a, %b
    201   ret <2 x half> %r
    202 }
    203 
    204 ; CHECK-LABEL: test_fdiv(
    205 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fdiv_param_0];
    206 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fdiv_param_1];
    207 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    208 ; CHECK-DAG:  mov.b32         {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    209 ; CHECK-DAG:  cvt.f32.f16     [[FA0:%f[0-9]+]], [[A0]];
    210 ; CHECK-DAG:  cvt.f32.f16     [[FA1:%f[0-9]+]], [[A1]];
    211 ; CHECK-DAG:  cvt.f32.f16     [[FB0:%f[0-9]+]], [[B0]];
    212 ; CHECK-DAG:  cvt.f32.f16     [[FB1:%f[0-9]+]], [[B1]];
    213 ; CHECK-DAG:  div.rn.f32      [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
    214 ; CHECK-DAG:  div.rn.f32      [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
    215 ; CHECK-DAG:  cvt.rn.f16.f32  [[R0:%h[0-9]+]], [[FR0]];
    216 ; CHECK-DAG:  cvt.rn.f16.f32  [[R1:%h[0-9]+]], [[FR1]];
    217 ; CHECK-NEXT: mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    218 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    219 ; CHECK-NEXT: ret;
    220 define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 {
    221   %r = fdiv <2 x half> %a, %b
    222   ret <2 x half> %r
    223 }
    224 
    225 ; CHECK-LABEL: test_frem(
    226 ; -- Load two 16x2 inputs and split them into f16 elements
    227 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_frem_param_0];
    228 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_frem_param_1];
    229 ; -- Split into elements
    230 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    231 ; CHECK-DAG:  mov.b32         {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    232 ; -- promote to f32.
    233 ; CHECK-DAG:  cvt.f32.f16     [[FA0:%f[0-9]+]], [[A0]];
    234 ; CHECK-DAG:  cvt.f32.f16     [[FB0:%f[0-9]+]], [[B0]];
    235 ; CHECK-DAG:  cvt.f32.f16     [[FA1:%f[0-9]+]], [[A1]];
    236 ; CHECK-DAG:  cvt.f32.f16     [[FB1:%f[0-9]+]], [[B1]];
    237 ; -- frem(a[0],b[0]).
    238 ; CHECK-DAG:  div.rn.f32      [[FD0:%f[0-9]+]], [[FA0]], [[FB0]];
    239 ; CHECK-DAG:  cvt.rmi.f32.f32 [[DI0:%f[0-9]+]], [[FD0]];
    240 ; CHECK-DAG:  mul.f32         [[RI0:%f[0-9]+]], [[DI0]], [[FB0]];
    241 ; CHECK-DAG:  sub.f32         [[RF0:%f[0-9]+]], [[FA0]], [[RI0]];
    242 ; -- frem(a[1],b[1]).
    243 ; CHECK-DAG:  div.rn.f32      [[FD1:%f[0-9]+]], [[FA1]], [[FB1]];
    244 ; CHECK-DAG:  cvt.rmi.f32.f32 [[DI1:%f[0-9]+]], [[FD1]];
    245 ; CHECK-DAG:  mul.f32         [[RI1:%f[0-9]+]], [[DI1]], [[FB1]];
    246 ; CHECK-DAG:  sub.f32         [[RF1:%f[0-9]+]], [[FA1]], [[RI1]];
    247 ; -- convert back to f16.
    248 ; CHECK-DAG:  cvt.rn.f16.f32  [[R0:%h[0-9]+]], [[RF0]];
    249 ; CHECK-DAG:  cvt.rn.f16.f32  [[R1:%h[0-9]+]], [[RF1]];
    250 ; -- merge into f16x2 and return it.
    251 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    252 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    253 ; CHECK-NEXT: ret;
    254 define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 {
    255   %r = frem <2 x half> %a, %b
    256   ret <2 x half> %r
    257 }
    258 
    259 ; CHECK-LABEL: .func test_ldst_v2f16(
    260 ; CHECK-DAG:    ld.param.u64    %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0];
    261 ; CHECK-DAG:    ld.param.u64    %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1];
    262 ; CHECK-DAG:    ld.b32          [[E:%hh[0-9]+]], [%[[A]]]
    263 ; CHECK:        mov.b32         {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]];
    264 ; CHECK-DAG:    st.v2.b16       [%[[B]]], {[[E0]], [[E1]]};
    265 ; CHECK:        ret;
    266 define void @test_ldst_v2f16(<2 x half>* %a, <2 x half>* %b) {
    267   %t1 = load <2 x half>, <2 x half>* %a
    268   store <2 x half> %t1, <2 x half>* %b, align 16
    269   ret void
    270 }
    271 
    272 ; CHECK-LABEL: .func test_ldst_v3f16(
    273 ; CHECK-DAG:    ld.param.u64    %[[A:rd[0-9]+]], [test_ldst_v3f16_param_0];
    274 ; CHECK-DAG:    ld.param.u64    %[[B:rd[0-9]+]], [test_ldst_v3f16_param_1];
    275 ; -- v3 is inconvenient to capture as it's lowered as ld.b64 + fair
    276 ;    number of bitshifting instructions that may change at llvm's whim.
    277 ;    So we only verify that we only issue correct number of writes using
    278 ;    correct offset, but not the values we write.
    279 ; CHECK-DAG:    ld.u64
    280 ; CHECK-DAG:    st.u32          [%[[B]]],
    281 ; CHECK-DAG:    st.b16          [%[[B]]+4],
    282 ; CHECK:        ret;
    283 define void @test_ldst_v3f16(<3 x half>* %a, <3 x half>* %b) {
    284   %t1 = load <3 x half>, <3 x half>* %a
    285   store <3 x half> %t1, <3 x half>* %b, align 16
    286   ret void
    287 }
    288 
    289 ; CHECK-LABEL: .func test_ldst_v4f16(
    290 ; CHECK-DAG:    ld.param.u64    %[[A:rd[0-9]+]], [test_ldst_v4f16_param_0];
    291 ; CHECK-DAG:    ld.param.u64    %[[B:rd[0-9]+]], [test_ldst_v4f16_param_1];
    292 ; CHECK-DAG:    ld.v4.b16       {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]];
    293 ; CHECK-DAG:    st.v4.b16       [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
    294 ; CHECK:        ret;
    295 define void @test_ldst_v4f16(<4 x half>* %a, <4 x half>* %b) {
    296   %t1 = load <4 x half>, <4 x half>* %a
    297   store <4 x half> %t1, <4 x half>* %b, align 16
    298   ret void
    299 }
    300 
    301 ; CHECK-LABEL: .func test_ldst_v8f16(
    302 ; CHECK-DAG:    ld.param.u64    %[[A:rd[0-9]+]], [test_ldst_v8f16_param_0];
    303 ; CHECK-DAG:    ld.param.u64    %[[B:rd[0-9]+]], [test_ldst_v8f16_param_1];
    304 ; CHECK-DAG:    ld.v4.b32       {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [%[[A]]];
    305 ; CHECK-DAG:    st.v4.b32       [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
    306 ; CHECK:        ret;
    307 define void @test_ldst_v8f16(<8 x half>* %a, <8 x half>* %b) {
    308   %t1 = load <8 x half>, <8 x half>* %a
    309   store <8 x half> %t1, <8 x half>* %b, align 16
    310   ret void
    311 }
    312 
    313 declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0
    314 
    315 ; CHECK-LABEL: test_call(
    316 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_call_param_0];
    317 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_call_param_1];
    318 ; CHECK:      {
    319 ; CHECK-DAG:  .param .align 4 .b8 param0[4];
    320 ; CHECK-DAG:  .param .align 4 .b8 param1[4];
    321 ; CHECK-DAG:  st.param.b32    [param0+0], [[A]];
    322 ; CHECK-DAG:  st.param.b32    [param1+0], [[B]];
    323 ; CHECK-DAG:  .param .align 4 .b8 retval0[4];
    324 ; CHECK:      call.uni (retval0),
    325 ; CHECK-NEXT:        test_callee,
    326 ; CHECK:      );
    327 ; CHECK-NEXT: ld.param.b32    [[R:%hh[0-9]+]], [retval0+0];
    328 ; CHECK-NEXT: }
    329 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    330 ; CHECK-NEXT: ret;
    331 define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
    332   %r = call <2 x half> @test_callee(<2 x half> %a, <2 x half> %b)
    333   ret <2 x half> %r
    334 }
    335 
    336 ; CHECK-LABEL: test_call_flipped(
    337 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_call_flipped_param_0];
    338 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_call_flipped_param_1];
    339 ; CHECK:      {
    340 ; CHECK-DAG:  .param .align 4 .b8 param0[4];
    341 ; CHECK-DAG:  .param .align 4 .b8 param1[4];
    342 ; CHECK-DAG:  st.param.b32    [param0+0], [[B]];
    343 ; CHECK-DAG:  st.param.b32    [param1+0], [[A]];
    344 ; CHECK-DAG:  .param .align 4 .b8 retval0[4];
    345 ; CHECK:      call.uni (retval0),
    346 ; CHECK-NEXT:        test_callee,
    347 ; CHECK:      );
    348 ; CHECK-NEXT: ld.param.b32    [[R:%hh[0-9]+]], [retval0+0];
    349 ; CHECK-NEXT: }
    350 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    351 ; CHECK-NEXT: ret;
    352 define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
    353   %r = call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a)
    354   ret <2 x half> %r
    355 }
    356 
    357 ; CHECK-LABEL: test_tailcall_flipped(
    358 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_tailcall_flipped_param_0];
    359 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_tailcall_flipped_param_1];
    360 ; CHECK:      {
    361 ; CHECK-DAG:  .param .align 4 .b8 param0[4];
    362 ; CHECK-DAG:  .param .align 4 .b8 param1[4];
    363 ; CHECK-DAG:  st.param.b32    [param0+0], [[B]];
    364 ; CHECK-DAG:  st.param.b32    [param1+0], [[A]];
    365 ; CHECK-DAG:  .param .align 4 .b8 retval0[4];
    366 ; CHECK:      call.uni (retval0),
    367 ; CHECK-NEXT:        test_callee,
    368 ; CHECK:      );
    369 ; CHECK-NEXT: ld.param.b32    [[R:%hh[0-9]+]], [retval0+0];
    370 ; CHECK-NEXT: }
    371 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    372 ; CHECK-NEXT: ret;
    373 define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 {
    374   %r = tail call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a)
    375   ret <2 x half> %r
    376 }
    377 
    378 ; CHECK-LABEL: test_select(
    379 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_select_param_0];
    380 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_select_param_1];
    381 ; CHECK-DAG:  ld.param.u8     [[C:%rs[0-9]+]], [test_select_param_2]
    382 ; CHECK-DAG:  setp.eq.b16     [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
    383 ; CHECK-NEXT: selp.b32        [[R:%hh[0-9]+]], [[A]], [[B]], [[PRED]];
    384 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    385 ; CHECK-NEXT: ret;
    386 define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 {
    387   %r = select i1 %c, <2 x half> %a, <2 x half> %b
    388   ret <2 x half> %r
    389 }
    390 
    391 ; CHECK-LABEL: test_select_cc(
    392 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_select_cc_param_0];
    393 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_select_cc_param_1];
    394 ; CHECK-DAG:  ld.param.b32    [[C:%hh[0-9]+]], [test_select_cc_param_2];
    395 ; CHECK-DAG:  ld.param.b32    [[D:%hh[0-9]+]], [test_select_cc_param_3];
    396 ;
    397 ; CHECK-F16:  setp.neu.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]]
    398 ;
    399 ; CHECK-NOF16-DAG: mov.b32        {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
    400 ; CHECK-NOF16-DAG: mov.b32        {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]]
    401 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]];
    402 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]];
    403 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]];
    404 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]];
    405 ; CHECK-NOF16-DAG: setp.neu.f32    [[P0:%p[0-9]+]], [[CF0]], [[DF0]]
    406 ; CHECK-NOF16-DAG: setp.neu.f32    [[P1:%p[0-9]+]], [[CF1]], [[DF1]]
    407 ;
    408 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    409 ; CHECK-DAG:  mov.b32         {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    410 ; CHECK-DAG:  selp.b16        [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]];
    411 ; CHECK-DAG:  selp.b16        [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]];
    412 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    413 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    414 ; CHECK-NEXT: ret;
    415 define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 {
    416   %cc = fcmp une <2 x half> %c, %d
    417   %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b
    418   ret <2 x half> %r
    419 }
    420 
    421 ; CHECK-LABEL: test_select_cc_f32_f16(
    422 ; CHECK-DAG:  ld.param.v2.f32    {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_0];
    423 ; CHECK-DAG:  ld.param.v2.f32    {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_1];
    424 ; CHECK-DAG:  ld.param.b32    [[C:%hh[0-9]+]], [test_select_cc_f32_f16_param_2];
    425 ; CHECK-DAG:  ld.param.b32    [[D:%hh[0-9]+]], [test_select_cc_f32_f16_param_3];
    426 ;
    427 ; CHECK-F16:  setp.neu.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]]
    428 ; CHECK-NOF16-DAG: mov.b32         {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
    429 ; CHECK-NOF16-DAG: mov.b32         {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]]
    430 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]];
    431 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]];
    432 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]];
    433 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]];
    434 ; CHECK-NOF16-DAG: setp.neu.f32    [[P0:%p[0-9]+]], [[CF0]], [[DF0]]
    435 ; CHECK-NOF16-DAG: setp.neu.f32    [[P1:%p[0-9]+]], [[CF1]], [[DF1]]
    436 ;
    437 ; CHECK-DAG: selp.f32        [[R0:%f[0-9]+]], [[A0]], [[B0]], [[P0]];
    438 ; CHECK-DAG: selp.f32        [[R1:%f[0-9]+]], [[A1]], [[B1]], [[P1]];
    439 ; CHECK-NEXT: st.param.v2.f32    [func_retval0+0], {[[R0]], [[R1]]};
    440 ; CHECK-NEXT: ret;
    441 define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b,
    442                                            <2 x half> %c, <2 x half> %d) #0 {
    443   %cc = fcmp une <2 x half> %c, %d
    444   %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b
    445   ret <2 x float> %r
    446 }
    447 
    448 ; CHECK-LABEL: test_select_cc_f16_f32(
    449 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_select_cc_f16_f32_param_0];
    450 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_select_cc_f16_f32_param_1];
    451 ; CHECK-DAG:  ld.param.v2.f32 {[[C0:%f[0-9]+]], [[C1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_2];
    452 ; CHECK-DAG:  ld.param.v2.f32 {[[D0:%f[0-9]+]], [[D1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_3];
    453 ; CHECK-DAG:  setp.neu.f32    [[P0:%p[0-9]+]], [[C0]], [[D0]]
    454 ; CHECK-DAG:  setp.neu.f32    [[P1:%p[0-9]+]], [[C1]], [[D1]]
    455 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    456 ; CHECK-DAG:  mov.b32         {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    457 ; CHECK-DAG:  selp.b16        [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]];
    458 ; CHECK-DAG:  selp.b16        [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]];
    459 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    460 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    461 ; CHECK-NEXT: ret;
    462 define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b,
    463                                           <2 x float> %c, <2 x float> %d) #0 {
    464   %cc = fcmp une <2 x float> %c, %d
    465   %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b
    466   ret <2 x half> %r
    467 }
    468 
    469 ; CHECK-LABEL: test_fcmp_une(
    470 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_une_param_0];
    471 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_une_param_1];
    472 ; CHECK-F16:  setp.neu.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    473 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    474 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    475 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    476 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    477 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    478 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    479 ; CHECK-NOF16-DAG:  setp.neu.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    480 ; CHECK-NOF16-DAG:  setp.neu.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    481 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    482 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    483 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    484 ; CHECK-NEXT: ret;
    485 define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 {
    486   %r = fcmp une <2 x half> %a, %b
    487   ret <2 x i1> %r
    488 }
    489 
    490 ; CHECK-LABEL: test_fcmp_ueq(
    491 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_ueq_param_0];
    492 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_ueq_param_1];
    493 ; CHECK-F16:  setp.equ.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    494 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    495 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    496 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    497 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    498 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    499 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    500 ; CHECK-NOF16-DAG:  setp.equ.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    501 ; CHECK-NOF16-DAG:  setp.equ.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    502 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    503 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    504 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    505 ; CHECK-NEXT: ret;
    506 define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 {
    507   %r = fcmp ueq <2 x half> %a, %b
    508   ret <2 x i1> %r
    509 }
    510 
    511 ; CHECK-LABEL: test_fcmp_ugt(
    512 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_ugt_param_0];
    513 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_ugt_param_1];
    514 ; CHECK-F16:  setp.gtu.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    515 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    516 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    517 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    518 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    519 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    520 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    521 ; CHECK-NOF16-DAG:  setp.gtu.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    522 ; CHECK-NOF16-DAG:  setp.gtu.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    523 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    524 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    525 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    526 ; CHECK-NEXT: ret;
    527 define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 {
    528   %r = fcmp ugt <2 x half> %a, %b
    529   ret <2 x i1> %r
    530 }
    531 
    532 ; CHECK-LABEL: test_fcmp_uge(
    533 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_uge_param_0];
    534 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_uge_param_1];
    535 ; CHECK-F16:  setp.geu.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    536 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    537 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    538 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    539 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    540 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    541 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    542 ; CHECK-NOF16-DAG:  setp.geu.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    543 ; CHECK-NOF16-DAG:  setp.geu.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    544 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    545 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    546 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    547 ; CHECK-NEXT: ret;
    548 define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 {
    549   %r = fcmp uge <2 x half> %a, %b
    550   ret <2 x i1> %r
    551 }
    552 
    553 ; CHECK-LABEL: test_fcmp_ult(
    554 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_ult_param_0];
    555 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_ult_param_1];
    556 ; CHECK-F16:  setp.ltu.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    557 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    558 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    559 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    560 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    561 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    562 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    563 ; CHECK-NOF16-DAG:  setp.ltu.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    564 ; CHECK-NOF16-DAG:  setp.ltu.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    565 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    566 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    567 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    568 ; CHECK-NEXT: ret;
    569 define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 {
    570   %r = fcmp ult <2 x half> %a, %b
    571   ret <2 x i1> %r
    572 }
    573 
    574 ; CHECK-LABEL: test_fcmp_ule(
    575 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_ule_param_0];
    576 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_ule_param_1];
    577 ; CHECK-F16:  setp.leu.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    578 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    579 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    580 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    581 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    582 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    583 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    584 ; CHECK-NOF16-DAG:  setp.leu.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    585 ; CHECK-NOF16-DAG:  setp.leu.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    586 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    587 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    588 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    589 ; CHECK-NEXT: ret;
    590 define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 {
    591   %r = fcmp ule <2 x half> %a, %b
    592   ret <2 x i1> %r
    593 }
    594 
    595 
    596 ; CHECK-LABEL: test_fcmp_uno(
    597 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_uno_param_0];
    598 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_uno_param_1];
    599 ; CHECK-F16:  setp.nan.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    600 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    601 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    602 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    603 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    604 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    605 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    606 ; CHECK-NOF16-DAG:  setp.nan.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    607 ; CHECK-NOF16-DAG:  setp.nan.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    608 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    609 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    610 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    611 ; CHECK-NEXT: ret;
    612 define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 {
    613   %r = fcmp uno <2 x half> %a, %b
    614   ret <2 x i1> %r
    615 }
    616 
    617 ; CHECK-LABEL: test_fcmp_one(
    618 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_one_param_0];
    619 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_one_param_1];
    620 ; CHECK-F16:  setp.ne.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    621 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    622 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    623 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    624 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    625 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    626 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    627 ; CHECK-NOF16-DAG:  setp.ne.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    628 ; CHECK-NOF16-DAG:  setp.ne.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    629 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    630 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    631 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    632 ; CHECK-NEXT: ret;
    633 define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 {
    634   %r = fcmp one <2 x half> %a, %b
    635   ret <2 x i1> %r
    636 }
    637 
    638 ; CHECK-LABEL: test_fcmp_oeq(
    639 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_oeq_param_0];
    640 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_oeq_param_1];
    641 ; CHECK-F16:  setp.eq.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    642 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    643 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    644 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    645 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    646 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    647 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    648 ; CHECK-NOF16-DAG:  setp.eq.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    649 ; CHECK-NOF16-DAG:  setp.eq.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    650 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    651 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    652 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    653 ; CHECK-NEXT: ret;
    654 define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 {
    655   %r = fcmp oeq <2 x half> %a, %b
    656   ret <2 x i1> %r
    657 }
    658 
    659 ; CHECK-LABEL: test_fcmp_ogt(
    660 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_ogt_param_0];
    661 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_ogt_param_1];
    662 ; CHECK-F16:  setp.gt.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    663 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    664 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    665 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    666 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    667 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    668 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    669 ; CHECK-NOF16-DAG:  setp.gt.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    670 ; CHECK-NOF16-DAG:  setp.gt.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    671 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    672 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    673 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    674 ; CHECK-NEXT: ret;
    675 define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 {
    676   %r = fcmp ogt <2 x half> %a, %b
    677   ret <2 x i1> %r
    678 }
    679 
    680 ; CHECK-LABEL: test_fcmp_oge(
    681 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_oge_param_0];
    682 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_oge_param_1];
    683 ; CHECK-F16:  setp.ge.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    684 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    685 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    686 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    687 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    688 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    689 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    690 ; CHECK-NOF16-DAG:  setp.ge.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    691 ; CHECK-NOF16-DAG:  setp.ge.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    692 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    693 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    694 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    695 ; CHECK-NEXT: ret;
    696 define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 {
    697   %r = fcmp oge <2 x half> %a, %b
    698   ret <2 x i1> %r
    699 }
    700 
    701 ; CHECK-LABEL: test_fcmp_olt(
    702 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_olt_param_0];
    703 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_olt_param_1];
    704 ; CHECK-F16:  setp.lt.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    705 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    706 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    707 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    708 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    709 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    710 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    711 ; CHECK-NOF16-DAG:  setp.lt.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    712 ; CHECK-NOF16-DAG:  setp.lt.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    713 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    714 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    715 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    716 ; CHECK-NEXT: ret;
    717 define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 {
    718   %r = fcmp olt <2 x half> %a, %b
    719   ret <2 x i1> %r
    720 }
    721 
    722 ; XCHECK-LABEL: test_fcmp_ole(
    723 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_ole_param_0];
    724 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_ole_param_1];
    725 ; CHECK-F16:  setp.le.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    726 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    727 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    728 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    729 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    730 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    731 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    732 ; CHECK-NOF16-DAG:  setp.le.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    733 ; CHECK-NOF16-DAG:  setp.le.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    734 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    735 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    736 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    737 ; CHECK-NEXT: ret;
    738 define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 {
    739   %r = fcmp ole <2 x half> %a, %b
    740   ret <2 x i1> %r
    741 }
    742 
    743 ; CHECK-LABEL: test_fcmp_ord(
    744 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fcmp_ord_param_0];
    745 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fcmp_ord_param_1];
    746 ; CHECK-F16:  setp.num.f16x2  [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
    747 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    748 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    749 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
    750 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    751 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
    752 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    753 ; CHECK-NOF16-DAG:  setp.num.f32   [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
    754 ; CHECK-NOF16-DAG:  setp.num.f32   [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
    755 ; CHECK-DAG:  selp.u16        [[R0:%rs[0-9]+]], -1, 0, [[P0]];
    756 ; CHECK-DAG:  selp.u16        [[R1:%rs[0-9]+]], -1, 0, [[P1]];
    757 ; CHECK-NEXT: st.param.v2.b8  [func_retval0+0], {[[R0]], [[R1]]};
    758 ; CHECK-NEXT: ret;
    759 define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 {
    760   %r = fcmp ord <2 x half> %a, %b
    761   ret <2 x i1> %r
    762 }
    763 
    764 ; CHECK-LABEL: test_fptosi_i32(
    765 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_fptosi_i32_param_0];
    766 ; CHECK:      mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    767 ; CHECK-DAG:  cvt.rzi.s32.f16 [[R0:%r[0-9]+]], [[A0]];
    768 ; CHECK-DAG:  cvt.rzi.s32.f16 [[R1:%r[0-9]+]], [[A1]];
    769 ; CHECK:      st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}
    770 ; CHECK:      ret;
    771 define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 {
    772   %r = fptosi <2 x half> %a to <2 x i32>
    773   ret <2 x i32> %r
    774 }
    775 
    776 ; CHECK-LABEL: test_fptosi_i64(
    777 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_fptosi_i64_param_0];
    778 ; CHECK:      mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    779 ; CHECK-DAG:  cvt.rzi.s64.f16 [[R0:%rd[0-9]+]], [[A0]];
    780 ; CHECK-DAG:  cvt.rzi.s64.f16 [[R1:%rd[0-9]+]], [[A1]];
    781 ; CHECK:      st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]}
    782 ; CHECK:      ret;
    783 define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 {
    784   %r = fptosi <2 x half> %a to <2 x i64>
    785   ret <2 x i64> %r
    786 }
    787 
    788 ; CHECK-LABEL: test_fptoui_2xi32(
    789 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_fptoui_2xi32_param_0];
    790 ; CHECK:      mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    791 ; CHECK-DAG:  cvt.rzi.u32.f16 [[R0:%r[0-9]+]], [[A0]];
    792 ; CHECK-DAG:  cvt.rzi.u32.f16 [[R1:%r[0-9]+]], [[A1]];
    793 ; CHECK:      st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}
    794 ; CHECK:      ret;
    795 define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 {
    796   %r = fptoui <2 x half> %a to <2 x i32>
    797   ret <2 x i32> %r
    798 }
    799 
    800 ; CHECK-LABEL: test_fptoui_2xi64(
    801 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_fptoui_2xi64_param_0];
    802 ; CHECK:      mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    803 ; CHECK-DAG:  cvt.rzi.u64.f16 [[R0:%rd[0-9]+]], [[A0]];
    804 ; CHECK-DAG:  cvt.rzi.u64.f16 [[R1:%rd[0-9]+]], [[A1]];
    805 ; CHECK:      st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]}
    806 ; CHECK:      ret;
    807 define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 {
    808   %r = fptoui <2 x half> %a to <2 x i64>
    809   ret <2 x i64> %r
    810 }
    811 
    812 ; CHECK-LABEL: test_uitofp_2xi32(
    813 ; CHECK:      ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_param_0];
    814 ; CHECK-DAG:  cvt.rn.f16.u32  [[R0:%h[0-9]+]], [[A0]];
    815 ; CHECK-DAG:  cvt.rn.f16.u32  [[R1:%h[0-9]+]], [[A1]];
    816 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    817 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    818 ; CHECK:      ret;
    819 define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 {
    820   %r = uitofp <2 x i32> %a to <2 x half>
    821   ret <2 x half> %r
    822 }
    823 
    824 ; CHECK-LABEL: test_uitofp_2xi64(
    825 ; CHECK:      ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_uitofp_2xi64_param_0];
    826 ; CHECK-DAG:  cvt.rn.f32.u64  [[F0:%f[0-9]+]], [[A0]];
    827 ; CHECK-DAG:  cvt.rn.f32.u64  [[F1:%f[0-9]+]], [[A1]];
    828 ; CHECK-DAG:  cvt.rn.f16.f32  [[R0:%h[0-9]+]], [[F0]];
    829 ; CHECK-DAG:  cvt.rn.f16.f32  [[R1:%h[0-9]+]], [[F1]];
    830 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    831 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    832 ; CHECK:      ret;
    833 define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 {
    834   %r = uitofp <2 x i64> %a to <2 x half>
    835   ret <2 x half> %r
    836 }
    837 
    838 ; CHECK-LABEL: test_sitofp_2xi32(
    839 ; CHECK:      ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_param_0];
    840 ; CHECK-DAG:  cvt.rn.f16.s32  [[R0:%h[0-9]+]], [[A0]];
    841 ; CHECK-DAG:  cvt.rn.f16.s32  [[R1:%h[0-9]+]], [[A1]];
    842 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    843 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    844 ; CHECK:      ret;
    845 define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 {
    846   %r = sitofp <2 x i32> %a to <2 x half>
    847   ret <2 x half> %r
    848 }
    849 
    850 ; CHECK-LABEL: test_sitofp_2xi64(
    851 ; CHECK:      ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_sitofp_2xi64_param_0];
    852 ; CHECK-DAG:  cvt.rn.f32.s64  [[F0:%f[0-9]+]], [[A0]];
    853 ; CHECK-DAG:  cvt.rn.f32.s64  [[F1:%f[0-9]+]], [[A1]];
    854 ; CHECK-DAG:  cvt.rn.f16.f32  [[R0:%h[0-9]+]], [[F0]];
    855 ; CHECK-DAG:  cvt.rn.f16.f32  [[R1:%h[0-9]+]], [[F1]];
    856 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    857 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    858 ; CHECK:      ret;
    859 define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 {
    860   %r = sitofp <2 x i64> %a to <2 x half>
    861   ret <2 x half> %r
    862 }
    863 
    864 ; CHECK-LABEL: test_uitofp_2xi32_fadd(
    865 ; CHECK-DAG:  ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_fadd_param_0];
    866 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_uitofp_2xi32_fadd_param_1];
    867 ; CHECK-DAG:  cvt.rn.f16.u32  [[C0:%h[0-9]+]], [[A0]];
    868 ; CHECK-DAG:  cvt.rn.f16.u32  [[C1:%h[0-9]+]], [[A1]];
    869 
    870 ; CHECK-F16-DAG:  mov.b32         [[C:%hh[0-9]+]], {[[C0]], [[C1]]}
    871 ; CHECK-F16-DAG:  add.rn.f16x2    [[R:%hh[0-9]+]], [[B]], [[C]];
    872 ;
    873 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    874 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    875 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    876 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FC0:%f[0-9]+]], [[C0]]
    877 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FC1:%f[0-9]+]], [[C1]]
    878 ; CHECK-NOF16-DAG:  add.rn.f32     [[FR0:%f[0-9]+]], [[FB0]], [[FC0]];
    879 ; CHECK-NOF16-DAG:  add.rn.f32     [[FR1:%f[0-9]+]], [[FB1]], [[FC1]];
    880 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
    881 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
    882 ; CHECK-NOF16:      mov.b32        [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    883 ;
    884 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    885 ; CHECK:      ret;
    886 define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
    887   %c = uitofp <2 x i32> %a to <2 x half>
    888   %r = fadd <2 x half> %b, %c
    889   ret <2 x half> %r
    890 }
    891 
    892 ; CHECK-LABEL: test_sitofp_2xi32_fadd(
    893 ; CHECK-DAG:  ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_fadd_param_0];
    894 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_sitofp_2xi32_fadd_param_1];
    895 ; CHECK-DAG:  cvt.rn.f16.s32  [[C0:%h[0-9]+]], [[A0]];
    896 ; CHECK-DAG:  cvt.rn.f16.s32  [[C1:%h[0-9]+]], [[A1]];
    897 ;
    898 ; CHECK-F16-DAG:  mov.b32         [[C:%hh[0-9]+]], {[[C0]], [[C1]]}
    899 ; CHECK-F16-DAG:  add.rn.f16x2    [[R:%hh[0-9]+]], [[B]], [[C]];
    900 ;
    901 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
    902 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
    903 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
    904 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FC0:%f[0-9]+]], [[C0]]
    905 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FC1:%f[0-9]+]], [[C1]]
    906 ; CHECK-NOF16-DAG:  add.rn.f32     [[FR0:%f[0-9]+]], [[FB0]], [[FC0]];
    907 ; CHECK-NOF16-DAG:  add.rn.f32     [[FR1:%f[0-9]+]], [[FB1]], [[FC1]];
    908 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
    909 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
    910 ; CHECK-NOF16:      mov.b32        [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    911 ;
    912 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    913 ; CHECK:      ret;
    914 define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
    915   %c = sitofp <2 x i32> %a to <2 x half>
    916   %r = fadd <2 x half> %b, %c
    917   ret <2 x half> %r
    918 }
    919 
    920 ; CHECK-LABEL: test_fptrunc_2xfloat(
    921 ; CHECK:      ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_fptrunc_2xfloat_param_0];
    922 ; CHECK-DAG:  cvt.rn.f16.f32  [[R0:%h[0-9]+]], [[A0]];
    923 ; CHECK-DAG:  cvt.rn.f16.f32  [[R1:%h[0-9]+]], [[A1]];
    924 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    925 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    926 ; CHECK:      ret;
    927 define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 {
    928   %r = fptrunc <2 x float> %a to <2 x half>
    929   ret <2 x half> %r
    930 }
    931 
    932 ; CHECK-LABEL: test_fptrunc_2xdouble(
    933 ; CHECK:      ld.param.v2.f64 {[[A0:%fd[0-9]+]], [[A1:%fd[0-9]+]]}, [test_fptrunc_2xdouble_param_0];
    934 ; CHECK-DAG:  cvt.rn.f16.f64  [[R0:%h[0-9]+]], [[A0]];
    935 ; CHECK-DAG:  cvt.rn.f16.f64  [[R1:%h[0-9]+]], [[A1]];
    936 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
    937 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    938 ; CHECK:      ret;
    939 define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 {
    940   %r = fptrunc <2 x double> %a to <2 x half>
    941   ret <2 x half> %r
    942 }
    943 
    944 ; CHECK-LABEL: test_fpext_2xfloat(
    945 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_fpext_2xfloat_param_0];
    946 ; CHECK:      mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    947 ; CHECK-DAG:  cvt.f32.f16     [[R0:%f[0-9]+]], [[A0]];
    948 ; CHECK-DAG:  cvt.f32.f16     [[R1:%f[0-9]+]], [[A1]];
    949 ; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]};
    950 ; CHECK:      ret;
    951 define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 {
    952   %r = fpext <2 x half> %a to <2 x float>
    953   ret <2 x float> %r
    954 }
    955 
    956 ; CHECK-LABEL: test_fpext_2xdouble(
    957 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_fpext_2xdouble_param_0];
    958 ; CHECK:      mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
    959 ; CHECK-DAG:  cvt.f64.f16     [[R0:%fd[0-9]+]], [[A0]];
    960 ; CHECK-DAG:  cvt.f64.f16     [[R1:%fd[0-9]+]], [[A1]];
    961 ; CHECK-NEXT: st.param.v2.f64 [func_retval0+0], {[[R0]], [[R1]]};
    962 ; CHECK:      ret;
    963 define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 {
    964   %r = fpext <2 x half> %a to <2 x double>
    965   ret <2 x double> %r
    966 }
    967 
    968 
    969 ; CHECK-LABEL: test_bitcast_2xhalf_to_2xi16(
    970 ; CHECK:      ld.param.u32    [[A:%r[0-9]+]], [test_bitcast_2xhalf_to_2xi16_param_0];
    971 ; CHECK-DAG:  cvt.u16.u32     [[R0:%rs[0-9]+]], [[A]]
    972 ; CHECK-DAG:  shr.u32         [[AH:%r[0-9]+]], [[A]], 16
    973 ; CHECK-DAG:  cvt.u16.u32     [[R1:%rs[0-9]+]], [[AH]]
    974 ; CHECK:      st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]}
    975 ; CHECK:      ret;
    976 define <2 x i16> @test_bitcast_2xhalf_to_2xi16(<2 x half> %a) #0 {
    977   %r = bitcast <2 x half> %a to <2 x i16>
    978   ret <2 x i16> %r
    979 }
    980 
    981 ; CHECK-LABEL: test_bitcast_2xi16_to_2xhalf(
    982 ; CHECK:      ld.param.v2.u16         {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [test_bitcast_2xi16_to_2xhalf_param_0];
    983 ; CHECK-DAG:  cvt.u32.u16     [[R0:%r[0-9]+]], [[RS0]];
    984 ; CHECK-DAG:  cvt.u32.u16     [[R1:%r[0-9]+]], [[RS1]];
    985 ; CHECK-DAG:  shl.b32         [[R1H:%r[0-9]+]], [[R1]], 16;
    986 ; CHECK-DAG:  or.b32          [[R1H0L:%r[0-9]+]], [[R0]], [[R1H]];
    987 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], [[R1H0L]];
    988 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    989 ; CHECK:      ret;
    990 define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 {
    991   %r = bitcast <2 x i16> %a to <2 x half>
    992   ret <2 x half> %r
    993 }
    994 
    995 
    996 declare <2 x half> @llvm.sqrt.f16(<2 x half> %a) #0
    997 declare <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b) #0
    998 declare <2 x half> @llvm.sin.f16(<2 x half> %a) #0
    999 declare <2 x half> @llvm.cos.f16(<2 x half> %a) #0
   1000 declare <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) #0
   1001 declare <2 x half> @llvm.exp.f16(<2 x half> %a) #0
   1002 declare <2 x half> @llvm.exp2.f16(<2 x half> %a) #0
   1003 declare <2 x half> @llvm.log.f16(<2 x half> %a) #0
   1004 declare <2 x half> @llvm.log10.f16(<2 x half> %a) #0
   1005 declare <2 x half> @llvm.log2.f16(<2 x half> %a) #0
   1006 declare <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
   1007 declare <2 x half> @llvm.fabs.f16(<2 x half> %a) #0
   1008 declare <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) #0
   1009 declare <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) #0
   1010 declare <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) #0
   1011 declare <2 x half> @llvm.floor.f16(<2 x half> %a) #0
   1012 declare <2 x half> @llvm.ceil.f16(<2 x half> %a) #0
   1013 declare <2 x half> @llvm.trunc.f16(<2 x half> %a) #0
   1014 declare <2 x half> @llvm.rint.f16(<2 x half> %a) #0
   1015 declare <2 x half> @llvm.nearbyint.f16(<2 x half> %a) #0
   1016 declare <2 x half> @llvm.round.f16(<2 x half> %a) #0
   1017 declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
   1018 
   1019 ; CHECK-LABEL: test_sqrt(
   1020 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_sqrt_param_0];
   1021 ; CHECK:      mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
   1022 ; CHECK-DAG:  cvt.f32.f16     [[AF0:%f[0-9]+]], [[A0]];
   1023 ; CHECK-DAG:  cvt.f32.f16     [[AF1:%f[0-9]+]], [[A1]];
   1024 ; CHECK-DAG:  sqrt.rn.f32     [[RF0:%f[0-9]+]], [[AF0]];
   1025 ; CHECK-DAG:  sqrt.rn.f32     [[RF1:%f[0-9]+]], [[AF1]];
   1026 ; CHECK-DAG:  cvt.rn.f16.f32  [[R0:%h[0-9]+]], [[RF0]];
   1027 ; CHECK-DAG:  cvt.rn.f16.f32  [[R1:%h[0-9]+]], [[RF1]];
   1028 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1029 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1030 ; CHECK:      ret;
   1031 define <2 x half> @test_sqrt(<2 x half> %a) #0 {
   1032   %r = call <2 x half> @llvm.sqrt.f16(<2 x half> %a)
   1033   ret <2 x half> %r
   1034 }
   1035 
   1036 ;;; Can't do this yet: requires libcall.
   1037 ; XCHECK-LABEL: test_powi(
   1038 ;define <2 x half> @test_powi(<2 x half> %a, <2 x i32> %b) #0 {
   1039 ;  %r = call <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b)
   1040 ;  ret <2 x half> %r
   1041 ;}
   1042 
   1043 ; CHECK-LABEL: test_sin(
   1044 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_sin_param_0];
   1045 ; CHECK:      mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
   1046 ; CHECK-DAG:  cvt.f32.f16     [[AF0:%f[0-9]+]], [[A0]];
   1047 ; CHECK-DAG:  cvt.f32.f16     [[AF1:%f[0-9]+]], [[A1]];
   1048 ; CHECK-DAG:  sin.approx.f32  [[RF0:%f[0-9]+]], [[AF0]];
   1049 ; CHECK-DAG:  sin.approx.f32  [[RF1:%f[0-9]+]], [[AF1]];
   1050 ; CHECK-DAG:  cvt.rn.f16.f32  [[R0:%h[0-9]+]], [[RF0]];
   1051 ; CHECK-DAG:  cvt.rn.f16.f32  [[R1:%h[0-9]+]], [[RF1]];
   1052 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1053 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1054 ; CHECK:      ret;
   1055 define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
   1056   %r = call <2 x half> @llvm.sin.f16(<2 x half> %a)
   1057   ret <2 x half> %r
   1058 }
   1059 
   1060 ; CHECK-LABEL: test_cos(
   1061 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_cos_param_0];
   1062 ; CHECK:      mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
   1063 ; CHECK-DAG:  cvt.f32.f16     [[AF0:%f[0-9]+]], [[A0]];
   1064 ; CHECK-DAG:  cvt.f32.f16     [[AF1:%f[0-9]+]], [[A1]];
   1065 ; CHECK-DAG:  cos.approx.f32  [[RF0:%f[0-9]+]], [[AF0]];
   1066 ; CHECK-DAG:  cos.approx.f32  [[RF1:%f[0-9]+]], [[AF1]];
   1067 ; CHECK-DAG:  cvt.rn.f16.f32  [[R0:%h[0-9]+]], [[RF0]];
   1068 ; CHECK-DAG:  cvt.rn.f16.f32  [[R1:%h[0-9]+]], [[RF1]];
   1069 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1070 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1071 ; CHECK:      ret;
   1072 define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
   1073   %r = call <2 x half> @llvm.cos.f16(<2 x half> %a)
   1074   ret <2 x half> %r
   1075 }
   1076 
   1077 ;;; Can't do this yet: requires libcall.
   1078 ; XCHECK-LABEL: test_pow(
   1079 ;define <2 x half> @test_pow(<2 x half> %a, <2 x half> %b) #0 {
   1080 ;  %r = call <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b)
   1081 ;  ret <2 x half> %r
   1082 ;}
   1083 
   1084 ;;; Can't do this yet: requires libcall.
   1085 ; XCHECK-LABEL: test_exp(
   1086 ;define <2 x half> @test_exp(<2 x half> %a) #0 {
   1087 ;  %r = call <2 x half> @llvm.exp.f16(<2 x half> %a)
   1088 ;  ret <2 x half> %r
   1089 ;}
   1090 
   1091 ;;; Can't do this yet: requires libcall.
   1092 ; XCHECK-LABEL: test_exp2(
   1093 ;define <2 x half> @test_exp2(<2 x half> %a) #0 {
   1094 ;  %r = call <2 x half> @llvm.exp2.f16(<2 x half> %a)
   1095 ;  ret <2 x half> %r
   1096 ;}
   1097 
   1098 ;;; Can't do this yet: requires libcall.
   1099 ; XCHECK-LABEL: test_log(
   1100 ;define <2 x half> @test_log(<2 x half> %a) #0 {
   1101 ;  %r = call <2 x half> @llvm.log.f16(<2 x half> %a)
   1102 ;  ret <2 x half> %r
   1103 ;}
   1104 
   1105 ;;; Can't do this yet: requires libcall.
   1106 ; XCHECK-LABEL: test_log10(
   1107 ;define <2 x half> @test_log10(<2 x half> %a) #0 {
   1108 ;  %r = call <2 x half> @llvm.log10.f16(<2 x half> %a)
   1109 ;  ret <2 x half> %r
   1110 ;}
   1111 
   1112 ;;; Can't do this yet: requires libcall.
   1113 ; XCHECK-LABEL: test_log2(
   1114 ;define <2 x half> @test_log2(<2 x half> %a) #0 {
   1115 ;  %r = call <2 x half> @llvm.log2.f16(<2 x half> %a)
   1116 ;  ret <2 x half> %r
   1117 ;}
   1118 
   1119 ; CHECK-LABEL: test_fma(
   1120 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fma_param_0];
   1121 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fma_param_1];
   1122 ; CHECK-DAG:  ld.param.b32    [[C:%hh[0-9]+]], [test_fma_param_2];
   1123 ;
   1124 ; CHECK-F16:        fma.rn.f16x2   [[R:%hh[0-9]+]], [[A]], [[B]], [[C]];
   1125 ;
   1126 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
   1127 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
   1128 ; CHECK-NOF16-DAG:  mov.b32        {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
   1129 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
   1130 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
   1131 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FC0:%f[0-9]+]], [[C0]]
   1132 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
   1133 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
   1134 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FC0:%f[0-9]+]], [[C0]]
   1135 ; CHECK-NOF16-DAG:  fma.rn.f32     [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]];
   1136 ; CHECK-NOF16-DAG:  fma.rn.f32     [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]];
   1137 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
   1138 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
   1139 ; CHECK-NOF16:      mov.b32        [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1140 
   1141 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1142 ; CHECK:      ret
   1143 define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
   1144   %r = call <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
   1145   ret <2 x half> %r
   1146 }
   1147 
   1148 ; CHECK-LABEL: test_fabs(
   1149 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_fabs_param_0];
   1150 ; CHECK:      mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
   1151 ; CHECK-DAG:  cvt.f32.f16     [[AF0:%f[0-9]+]], [[A0]];
   1152 ; CHECK-DAG:  cvt.f32.f16     [[AF1:%f[0-9]+]], [[A1]];
   1153 ; CHECK-DAG:  abs.f32         [[RF0:%f[0-9]+]], [[AF0]];
   1154 ; CHECK-DAG:  abs.f32         [[RF1:%f[0-9]+]], [[AF1]];
   1155 ; CHECK-DAG:  cvt.rn.f16.f32  [[R0:%h[0-9]+]], [[RF0]];
   1156 ; CHECK-DAG:  cvt.rn.f16.f32  [[R1:%h[0-9]+]], [[RF1]];
   1157 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1158 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1159 ; CHECK:      ret;
   1160 define <2 x half> @test_fabs(<2 x half> %a) #0 {
   1161   %r = call <2 x half> @llvm.fabs.f16(<2 x half> %a)
   1162   ret <2 x half> %r
   1163 }
   1164 
   1165 ; CHECK-LABEL: test_minnum(
   1166 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_minnum_param_0];
   1167 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_minnum_param_1];
   1168 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
   1169 ; CHECK-DAG:  mov.b32         {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
   1170 ; CHECK-DAG:  cvt.f32.f16     [[AF0:%f[0-9]+]], [[A0]];
   1171 ; CHECK-DAG:  cvt.f32.f16     [[AF1:%f[0-9]+]], [[A1]];
   1172 ; CHECK-DAG:  cvt.f32.f16     [[BF0:%f[0-9]+]], [[B0]];
   1173 ; CHECK-DAG:  cvt.f32.f16     [[BF1:%f[0-9]+]], [[B1]];
   1174 ; CHECK-DAG:  min.f32         [[RF0:%f[0-9]+]], [[AF0]], [[BF0]];
   1175 ; CHECK-DAG:  min.f32         [[RF1:%f[0-9]+]], [[AF1]], [[BF1]];
   1176 ; CHECK-DAG:  cvt.rn.f16.f32  [[R0:%h[0-9]+]], [[RF0]];
   1177 ; CHECK-DAG:  cvt.rn.f16.f32  [[R1:%h[0-9]+]], [[RF1]];
   1178 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1179 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1180 ; CHECK:      ret;
   1181 define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 {
   1182   %r = call <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b)
   1183   ret <2 x half> %r
   1184 }
   1185 
   1186 ; CHECK-LABEL: test_maxnum(
   1187 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_maxnum_param_0];
   1188 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_maxnum_param_1];
   1189 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
   1190 ; CHECK-DAG:  mov.b32         {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
   1191 ; CHECK-DAG:  cvt.f32.f16     [[AF0:%f[0-9]+]], [[A0]];
   1192 ; CHECK-DAG:  cvt.f32.f16     [[AF1:%f[0-9]+]], [[A1]];
   1193 ; CHECK-DAG:  cvt.f32.f16     [[BF0:%f[0-9]+]], [[B0]];
   1194 ; CHECK-DAG:  cvt.f32.f16     [[BF1:%f[0-9]+]], [[B1]];
   1195 ; CHECK-DAG:  max.f32         [[RF0:%f[0-9]+]], [[AF0]], [[BF0]];
   1196 ; CHECK-DAG:  max.f32         [[RF1:%f[0-9]+]], [[AF1]], [[BF1]];
   1197 ; CHECK-DAG:  cvt.rn.f16.f32  [[R0:%h[0-9]+]], [[RF0]];
   1198 ; CHECK-DAG:  cvt.rn.f16.f32  [[R1:%h[0-9]+]], [[RF1]];
   1199 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1200 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1201 ; CHECK:      ret;
   1202 define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 {
   1203   %r = call <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b)
   1204   ret <2 x half> %r
   1205 }
   1206 
   1207 ; CHECK-LABEL: test_copysign(
   1208 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_copysign_param_0];
   1209 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_copysign_param_1];
   1210 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
   1211 ; CHECK-DAG:  mov.b32         {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
   1212 ; CHECK-DAG:  mov.b16         [[AS0:%rs[0-9]+]], [[A0]];
   1213 ; CHECK-DAG:  mov.b16         [[AS1:%rs[0-9]+]], [[A1]];
   1214 ; CHECK-DAG:  mov.b16         [[BS0:%rs[0-9]+]], [[B0]];
   1215 ; CHECK-DAG:  mov.b16         [[BS1:%rs[0-9]+]], [[B1]];
   1216 ; CHECK-DAG:  and.b16         [[AX0:%rs[0-9]+]], [[AS0]], 32767;
   1217 ; CHECK-DAG:  and.b16         [[AX1:%rs[0-9]+]], [[AS1]], 32767;
   1218 ; CHECK-DAG:  and.b16         [[BX0:%rs[0-9]+]], [[BS0]], -32768;
   1219 ; CHECK-DAG:  and.b16         [[BX1:%rs[0-9]+]], [[BS1]], -32768;
   1220 ; CHECK-DAG:  or.b16          [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]];
   1221 ; CHECK-DAG:  or.b16          [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]];
   1222 ; CHECK-DAG:  mov.b16         [[R0:%h[0-9]+]], [[RS0]];
   1223 ; CHECK-DAG:  mov.b16         [[R1:%h[0-9]+]], [[RS1]];
   1224 ; CHECK-DAG:  mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1225 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1226 ; CHECK:      ret;
   1227 define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
   1228   %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b)
   1229   ret <2 x half> %r
   1230 }
   1231 
   1232 ; CHECK-LABEL: test_copysign_f32(
   1233 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_copysign_f32_param_0];
   1234 ; CHECK-DAG:  ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1];
   1235 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
   1236 ; CHECK-DAG:  mov.b16         [[AS0:%rs[0-9]+]], [[A0]];
   1237 ; CHECK-DAG:  mov.b16         [[AS1:%rs[0-9]+]], [[A1]];
   1238 ; CHECK-DAG:  mov.b32         [[BI0:%r[0-9]+]], [[B0]];
   1239 ; CHECK-DAG:  mov.b32         [[BI1:%r[0-9]+]], [[B1]];
   1240 ; CHECK-DAG:  and.b16         [[AI0:%rs[0-9]+]], [[AS0]], 32767;
   1241 ; CHECK-DAG:  and.b16         [[AI1:%rs[0-9]+]], [[AS1]], 32767;
   1242 ; CHECK-DAG:  and.b32         [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
   1243 ; CHECK-DAG:  and.b32         [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
   1244 ; CHECK-DAG:  shr.u32         [[BY0:%r[0-9]+]], [[BX0]], 16;
   1245 ; CHECK-DAG:  shr.u32         [[BY1:%r[0-9]+]], [[BX1]], 16;
   1246 ; CHECK-DAG:  cvt.u16.u32     [[BZ0:%rs[0-9]+]], [[BY0]];
   1247 ; CHECK-DAG:  cvt.u16.u32     [[BZ1:%rs[0-9]+]], [[BY1]];
   1248 ; CHECK-DAG:  or.b16          [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]];
   1249 ; CHECK-DAG:  or.b16          [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]];
   1250 ; CHECK-DAG:  mov.b16         [[R0:%h[0-9]+]], [[RS0]];
   1251 ; CHECK-DAG:  mov.b16         [[R1:%h[0-9]+]], [[RS1]];
   1252 ; CHECK-DAG:  mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1253 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1254 ; CHECK:      ret;
   1255 define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
   1256   %tb = fptrunc <2 x float> %b to <2 x half>
   1257   %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb)
   1258   ret <2 x half> %r
   1259 }
   1260 
   1261 ; CHECK-LABEL: test_copysign_f64(
   1262 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_copysign_f64_param_0];
   1263 ; CHECK-DAG:  ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1];
   1264 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
   1265 ; CHECK-DAG:  mov.b16         [[AS0:%rs[0-9]+]], [[A0]];
   1266 ; CHECK-DAG:  mov.b16         [[AS1:%rs[0-9]+]], [[A1]];
   1267 ; CHECK-DAG:  mov.b64         [[BI0:%rd[0-9]+]], [[B0]];
   1268 ; CHECK-DAG:  mov.b64         [[BI1:%rd[0-9]+]], [[B1]];
   1269 ; CHECK-DAG:  and.b16         [[AI0:%rs[0-9]+]], [[AS0]], 32767;
   1270 ; CHECK-DAG:  and.b16         [[AI1:%rs[0-9]+]], [[AS1]], 32767;
   1271 ; CHECK-DAG:  and.b64         [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
   1272 ; CHECK-DAG:  and.b64         [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
   1273 ; CHECK-DAG:  shr.u64         [[BY0:%rd[0-9]+]], [[BX0]], 48;
   1274 ; CHECK-DAG:  shr.u64         [[BY1:%rd[0-9]+]], [[BX1]], 48;
   1275 ; CHECK-DAG:  cvt.u16.u64     [[BZ0:%rs[0-9]+]], [[BY0]];
   1276 ; CHECK-DAG:  cvt.u16.u64     [[BZ1:%rs[0-9]+]], [[BY1]];
   1277 ; CHECK-DAG:  or.b16          [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]];
   1278 ; CHECK-DAG:  or.b16          [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]];
   1279 ; CHECK-DAG:  mov.b16         [[R0:%h[0-9]+]], [[RS0]];
   1280 ; CHECK-DAG:  mov.b16         [[R1:%h[0-9]+]], [[RS1]];
   1281 ; CHECK-DAG:  mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1282 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1283 ; CHECK:      ret;
   1284 define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
   1285   %tb = fptrunc <2 x double> %b to <2 x half>
   1286   %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb)
   1287   ret <2 x half> %r
   1288 }
   1289 
   1290 ; CHECK-LABEL: test_copysign_extended(
   1291 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_copysign_extended_param_0];
   1292 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_copysign_extended_param_1];
   1293 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
   1294 ; CHECK-DAG:  mov.b32         {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
   1295 ; CHECK-DAG:  mov.b16         [[AS0:%rs[0-9]+]], [[A0]];
   1296 ; CHECK-DAG:  mov.b16         [[AS1:%rs[0-9]+]], [[A1]];
   1297 ; CHECK-DAG:  mov.b16         [[BS0:%rs[0-9]+]], [[B0]];
   1298 ; CHECK-DAG:  mov.b16         [[BS1:%rs[0-9]+]], [[B1]];
   1299 ; CHECK-DAG:  and.b16         [[AX0:%rs[0-9]+]], [[AS0]], 32767;
   1300 ; CHECK-DAG:  and.b16         [[AX1:%rs[0-9]+]], [[AS1]], 32767;
   1301 ; CHECK-DAG:  and.b16         [[BX0:%rs[0-9]+]], [[BS0]], -32768;
   1302 ; CHECK-DAG:  and.b16         [[BX1:%rs[0-9]+]], [[BS1]], -32768;
   1303 ; CHECK-DAG:  or.b16          [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]];
   1304 ; CHECK-DAG:  or.b16          [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]];
   1305 ; CHECK-DAG:  mov.b16         [[R0:%h[0-9]+]], [[RS0]];
   1306 ; CHECK-DAG:  mov.b16         [[R1:%h[0-9]+]], [[RS1]];
   1307 ; CHECK-DAG:  mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1308 ; CHECK:      mov.b32         {[[RX0:%h[0-9]+]], [[RX1:%h[0-9]+]]}, [[R]]
   1309 ; CHECK-DAG:  cvt.f32.f16     [[XR0:%f[0-9]+]], [[RX0]];
   1310 ; CHECK-DAG:  cvt.f32.f16     [[XR1:%f[0-9]+]], [[RX1]];
   1311 ; CHECK:      st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]};
   1312 ; CHECK:      ret;
   1313 define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 {
   1314   %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b)
   1315   %xr = fpext <2 x half> %r to <2 x float>
   1316   ret <2 x float> %xr
   1317 }
   1318 
   1319 ; CHECK-LABEL: test_floor(
   1320 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_floor_param_0];
   1321 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
   1322 ; CHECK-DAG:  cvt.rmi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
   1323 ; CHECK-DAG:  cvt.rmi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
   1324 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1325 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1326 ; CHECK:      ret;
   1327 define <2 x half> @test_floor(<2 x half> %a) #0 {
   1328   %r = call <2 x half> @llvm.floor.f16(<2 x half> %a)
   1329   ret <2 x half> %r
   1330 }
   1331 
   1332 ; CHECK-LABEL: test_ceil(
   1333 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_ceil_param_0];
   1334 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
   1335 ; CHECK-DAG:  cvt.rpi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
   1336 ; CHECK-DAG:  cvt.rpi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
   1337 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1338 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1339 ; CHECK:      ret;
   1340 define <2 x half> @test_ceil(<2 x half> %a) #0 {
   1341   %r = call <2 x half> @llvm.ceil.f16(<2 x half> %a)
   1342   ret <2 x half> %r
   1343 }
   1344 
   1345 ; CHECK-LABEL: test_trunc(
   1346 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_trunc_param_0];
   1347 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
   1348 ; CHECK-DAG:  cvt.rzi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
   1349 ; CHECK-DAG:  cvt.rzi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
   1350 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1351 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1352 ; CHECK:      ret;
   1353 define <2 x half> @test_trunc(<2 x half> %a) #0 {
   1354   %r = call <2 x half> @llvm.trunc.f16(<2 x half> %a)
   1355   ret <2 x half> %r
   1356 }
   1357 
   1358 ; CHECK-LABEL: test_rint(
   1359 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_rint_param_0];
   1360 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
   1361 ; CHECK-DAG:  cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
   1362 ; CHECK-DAG:  cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
   1363 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1364 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1365 ; CHECK:      ret;
   1366 define <2 x half> @test_rint(<2 x half> %a) #0 {
   1367   %r = call <2 x half> @llvm.rint.f16(<2 x half> %a)
   1368   ret <2 x half> %r
   1369 }
   1370 
   1371 ; CHECK-LABEL: test_nearbyint(
   1372 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_nearbyint_param_0];
   1373 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
   1374 ; CHECK-DAG:  cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
   1375 ; CHECK-DAG:  cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
   1376 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1377 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1378 ; CHECK:      ret;
   1379 define <2 x half> @test_nearbyint(<2 x half> %a) #0 {
   1380   %r = call <2 x half> @llvm.nearbyint.f16(<2 x half> %a)
   1381   ret <2 x half> %r
   1382 }
   1383 
   1384 ; CHECK-LABEL: test_round(
   1385 ; CHECK:      ld.param.b32    [[A:%hh[0-9]+]], [test_round_param_0];
   1386 ; CHECK-DAG:  mov.b32         {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
   1387 ; CHECK-DAG:  cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
   1388 ; CHECK-DAG:  cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
   1389 ; CHECK:      mov.b32         [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1390 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1391 ; CHECK:      ret;
   1392 define <2 x half> @test_round(<2 x half> %a) #0 {
   1393   %r = call <2 x half> @llvm.round.f16(<2 x half> %a)
   1394   ret <2 x half> %r
   1395 }
   1396 
   1397 ; CHECK-LABEL: test_fmuladd(
   1398 ; CHECK-DAG:  ld.param.b32    [[A:%hh[0-9]+]], [test_fmuladd_param_0];
   1399 ; CHECK-DAG:  ld.param.b32    [[B:%hh[0-9]+]], [test_fmuladd_param_1];
   1400 ; CHECK-DAG:  ld.param.b32    [[C:%hh[0-9]+]], [test_fmuladd_param_2];
   1401 ;
   1402 ; CHECK-F16:        fma.rn.f16x2   [[R:%hh[0-9]+]], [[A]], [[B]], [[C]];
   1403 ;
   1404 ; CHECK-NOF16-DAG:  mov.b32        {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
   1405 ; CHECK-NOF16-DAG:  mov.b32        {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
   1406 ; CHECK-NOF16-DAG:  mov.b32        {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
   1407 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA0:%f[0-9]+]], [[A0]]
   1408 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB0:%f[0-9]+]], [[B0]]
   1409 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FC0:%f[0-9]+]], [[C0]]
   1410 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FA1:%f[0-9]+]], [[A1]]
   1411 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FB1:%f[0-9]+]], [[B1]]
   1412 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[FC0:%f[0-9]+]], [[C0]]
   1413 ; CHECK-NOF16-DAG:  fma.rn.f32     [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]];
   1414 ; CHECK-NOF16-DAG:  fma.rn.f32     [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]];
   1415 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
   1416 ; CHECK-NOF16-DAG:  cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
   1417 ; CHECK-NOF16:      mov.b32        [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
   1418 ;
   1419 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
   1420 ; CHECK:      ret;
   1421 define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
   1422   %r = call <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
   1423   ret <2 x half> %r
   1424 }
   1425 
   1426 ; CHECK-LABEL: test_shufflevector(
   1427 ; CHECK: mov.b32 {%h1, %h2}, %hh1;
   1428 ; CHECK: mov.b32 %hh2, {%h2, %h1};
   1429 define <2 x half> @test_shufflevector(<2 x half> %a) #0 {
   1430   %s = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
   1431   ret <2 x half> %s
   1432 }
   1433 
   1434 ; CHECK-LABEL: test_insertelement(
   1435 ; CHECK: mov.b32 {%h2, %tmp_hi}, %hh1;
   1436 ; CHECK: mov.b32 %hh2, {%h2, %h1};
   1437 define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 {
   1438   %i = insertelement <2 x half> %a, half %x, i64 1
   1439   ret <2 x half> %i
   1440 }
   1441 
   1442 attributes #0 = { nounwind }
   1443 attributes #1 = { "unsafe-fp-math" = "true" }
   1444