1 ; ## Full FP16 support enabled by default. 2 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 3 ; RUN: -O0 -disable-post-ra -disable-fp-elim -verify-machineinstrs \ 4 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-F16 %s 5 ; ## FP16 support explicitly disabled. 6 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 7 ; RUN: -O0 -disable-post-ra -disable-fp-elim --nvptx-no-f16-math \ 8 ; RUN: -verify-machineinstrs \ 9 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s 10 ; ## FP16 is not supported by hardware. 11 ; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \ 12 ; RUN: -disable-post-ra -disable-fp-elim -verify-machineinstrs \ 13 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s 14 15 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 16 17 ; CHECK-LABEL: test_ret_const( 18 ; CHECK: mov.u32 [[T:%r[0-9+]]], 1073757184; 19 ; CHECK: mov.b32 [[R:%hh[0-9+]]], [[T]]; 20 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 21 ; CHECK-NEXT: ret; 22 define <2 x half> @test_ret_const() #0 { 23 ret <2 x half> <half 1.0, half 2.0> 24 } 25 26 ; CHECK-LABEL: test_extract_0( 27 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_0_param_0]; 28 ; CHECK: mov.b32 {[[R:%h[0-9]+]], %tmp_hi}, [[A]]; 29 ; CHECK: st.param.b16 [func_retval0+0], [[R]]; 30 ; CHECK: ret; 31 define half @test_extract_0(<2 x half> %a) #0 { 32 %e = extractelement <2 x half> %a, i32 0 33 ret half %e 34 } 35 36 ; CHECK-LABEL: test_extract_1( 37 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_1_param_0]; 38 ; CHECK: mov.b32 {%tmp_lo, [[R:%h[0-9]+]]}, [[A]]; 39 ; CHECK: st.param.b16 [func_retval0+0], [[R]]; 40 ; CHECK: ret; 41 define half @test_extract_1(<2 x half> %a) #0 { 42 %e = extractelement <2 x half> %a, i32 1 43 ret half %e 44 } 45 46 ; CHECK-LABEL: test_extract_i( 47 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_i_param_0]; 48 ; CHECK-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1]; 49 ; CHECK-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0; 50 ; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[A]]; 51 ; CHECK: selp.b16 [[R:%h[0-9]+]], [[E0]], [[E1]], [[PRED]]; 52 ; CHECK: st.param.b16 [func_retval0+0], [[R]]; 53 ; CHECK: ret; 54 define half @test_extract_i(<2 x half> %a, i64 %idx) #0 { 55 %e = extractelement <2 x half> %a, i64 %idx 56 ret half %e 57 } 58 59 ; CHECK-LABEL: test_fadd( 60 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_param_0]; 61 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_param_1]; 62 ; 63 ; CHECK-F16-NEXT: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; 64 ; 65 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 66 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 67 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 68 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 69 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 70 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 71 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; 72 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; 73 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 74 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 75 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 76 ; 77 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 78 ; CHECK-NEXT: ret; 79 define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 { 80 %r = fadd <2 x half> %a, %b 81 ret <2 x half> %r 82 } 83 84 ; Check that we can lower fadd with immediate arguments. 85 ; CHECK-LABEL: test_fadd_imm_0( 86 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_imm_0_param_0]; 87 ; 88 ; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184; 89 ; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]]; 90 ; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[IHH]]; 91 ; 92 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 93 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 94 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 95 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000; 96 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000; 97 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 98 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 99 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 100 ; 101 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 102 ; CHECK-NEXT: ret; 103 define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 { 104 %r = fadd <2 x half> <half 1.0, half 2.0>, %a 105 ret <2 x half> %r 106 } 107 108 ; CHECK-LABEL: test_fadd_imm_1( 109 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_imm_1_param_0]; 110 ; 111 ; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184; 112 ; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]]; 113 ; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[IHH]]; 114 ; 115 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 116 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 117 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 118 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000; 119 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000; 120 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 121 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 122 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 123 ; 124 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 125 ; CHECK-NEXT: ret; 126 define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 { 127 %r = fadd <2 x half> %a, <half 1.0, half 2.0> 128 ret <2 x half> %r 129 } 130 131 ; CHECK-LABEL: test_fsub( 132 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fsub_param_0]; 133 ; 134 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fsub_param_1]; 135 ; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; 136 ; 137 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 138 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 139 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 140 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 141 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 142 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 143 ; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; 144 ; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; 145 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 146 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 147 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 148 ; 149 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 150 ; CHECK-NEXT: ret; 151 define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 { 152 %r = fsub <2 x half> %a, %b 153 ret <2 x half> %r 154 } 155 156 ; CHECK-LABEL: test_fneg( 157 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fneg_param_0]; 158 ; 159 ; CHECK-F16: mov.u32 [[I0:%r[0-9+]]], 0; 160 ; CHECK-F16: mov.b32 [[IHH0:%hh[0-9+]]], [[I0]]; 161 ; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[IHH0]], [[A]]; 162 ; 163 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 164 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 165 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 166 ; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000; 167 ; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[Z]], [[FA0]]; 168 ; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[Z]], [[FA1]]; 169 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 170 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 171 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 172 ; 173 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 174 ; CHECK-NEXT: ret; 175 define <2 x half> @test_fneg(<2 x half> %a) #0 { 176 %r = fsub <2 x half> <half 0.0, half 0.0>, %a 177 ret <2 x half> %r 178 } 179 180 ; CHECK-LABEL: test_fmul( 181 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmul_param_0]; 182 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmul_param_1]; 183 ; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; 184 ; 185 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 186 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 187 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 188 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 189 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 190 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 191 ; CHECK-NOF16-DAG: mul.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; 192 ; CHECK-NOF16-DAG: mul.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; 193 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 194 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 195 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 196 ; 197 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 198 ; CHECK-NEXT: ret; 199 define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 { 200 %r = fmul <2 x half> %a, %b 201 ret <2 x half> %r 202 } 203 204 ; CHECK-LABEL: test_fdiv( 205 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fdiv_param_0]; 206 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fdiv_param_1]; 207 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 208 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 209 ; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]; 210 ; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]; 211 ; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]; 212 ; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]; 213 ; CHECK-DAG: div.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; 214 ; CHECK-DAG: div.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; 215 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]; 216 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]; 217 ; CHECK-NEXT: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 218 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 219 ; CHECK-NEXT: ret; 220 define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 { 221 %r = fdiv <2 x half> %a, %b 222 ret <2 x half> %r 223 } 224 225 ; CHECK-LABEL: test_frem( 226 ; -- Load two 16x2 inputs and split them into f16 elements 227 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_frem_param_0]; 228 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_frem_param_1]; 229 ; -- Split into elements 230 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 231 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 232 ; -- promote to f32. 233 ; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]; 234 ; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]; 235 ; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]; 236 ; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]; 237 ; -- frem(a[0],b[0]). 238 ; CHECK-DAG: div.rn.f32 [[FD0:%f[0-9]+]], [[FA0]], [[FB0]]; 239 ; CHECK-DAG: cvt.rmi.f32.f32 [[DI0:%f[0-9]+]], [[FD0]]; 240 ; CHECK-DAG: mul.f32 [[RI0:%f[0-9]+]], [[DI0]], [[FB0]]; 241 ; CHECK-DAG: sub.f32 [[RF0:%f[0-9]+]], [[FA0]], [[RI0]]; 242 ; -- frem(a[1],b[1]). 243 ; CHECK-DAG: div.rn.f32 [[FD1:%f[0-9]+]], [[FA1]], [[FB1]]; 244 ; CHECK-DAG: cvt.rmi.f32.f32 [[DI1:%f[0-9]+]], [[FD1]]; 245 ; CHECK-DAG: mul.f32 [[RI1:%f[0-9]+]], [[DI1]], [[FB1]]; 246 ; CHECK-DAG: sub.f32 [[RF1:%f[0-9]+]], [[FA1]], [[RI1]]; 247 ; -- convert back to f16. 248 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 249 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 250 ; -- merge into f16x2 and return it. 251 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 252 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 253 ; CHECK-NEXT: ret; 254 define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 { 255 %r = frem <2 x half> %a, %b 256 ret <2 x half> %r 257 } 258 259 ; CHECK-LABEL: .func test_ldst_v2f16( 260 ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0]; 261 ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1]; 262 ; CHECK-DAG: ld.b32 [[E:%hh[0-9]+]], [%[[A]]] 263 ; CHECK: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]]; 264 ; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]}; 265 ; CHECK: ret; 266 define void @test_ldst_v2f16(<2 x half>* %a, <2 x half>* %b) { 267 %t1 = load <2 x half>, <2 x half>* %a 268 store <2 x half> %t1, <2 x half>* %b, align 16 269 ret void 270 } 271 272 ; CHECK-LABEL: .func test_ldst_v3f16( 273 ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v3f16_param_0]; 274 ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v3f16_param_1]; 275 ; -- v3 is inconvenient to capture as it's lowered as ld.b64 + fair 276 ; number of bitshifting instructions that may change at llvm's whim. 277 ; So we only verify that we only issue correct number of writes using 278 ; correct offset, but not the values we write. 279 ; CHECK-DAG: ld.u64 280 ; CHECK-DAG: st.u32 [%[[B]]], 281 ; CHECK-DAG: st.b16 [%[[B]]+4], 282 ; CHECK: ret; 283 define void @test_ldst_v3f16(<3 x half>* %a, <3 x half>* %b) { 284 %t1 = load <3 x half>, <3 x half>* %a 285 store <3 x half> %t1, <3 x half>* %b, align 16 286 ret void 287 } 288 289 ; CHECK-LABEL: .func test_ldst_v4f16( 290 ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v4f16_param_0]; 291 ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v4f16_param_1]; 292 ; CHECK-DAG: ld.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]]; 293 ; CHECK-DAG: st.v4.b16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]}; 294 ; CHECK: ret; 295 define void @test_ldst_v4f16(<4 x half>* %a, <4 x half>* %b) { 296 %t1 = load <4 x half>, <4 x half>* %a 297 store <4 x half> %t1, <4 x half>* %b, align 16 298 ret void 299 } 300 301 ; CHECK-LABEL: .func test_ldst_v8f16( 302 ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v8f16_param_0]; 303 ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v8f16_param_1]; 304 ; CHECK-DAG: ld.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [%[[A]]]; 305 ; CHECK-DAG: st.v4.b32 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]}; 306 ; CHECK: ret; 307 define void @test_ldst_v8f16(<8 x half>* %a, <8 x half>* %b) { 308 %t1 = load <8 x half>, <8 x half>* %a 309 store <8 x half> %t1, <8 x half>* %b, align 16 310 ret void 311 } 312 313 declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0 314 315 ; CHECK-LABEL: test_call( 316 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_param_0]; 317 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_param_1]; 318 ; CHECK: { 319 ; CHECK-DAG: .param .align 4 .b8 param0[4]; 320 ; CHECK-DAG: .param .align 4 .b8 param1[4]; 321 ; CHECK-DAG: st.param.b32 [param0+0], [[A]]; 322 ; CHECK-DAG: st.param.b32 [param1+0], [[B]]; 323 ; CHECK-DAG: .param .align 4 .b8 retval0[4]; 324 ; CHECK: call.uni (retval0), 325 ; CHECK-NEXT: test_callee, 326 ; CHECK: ); 327 ; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; 328 ; CHECK-NEXT: } 329 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 330 ; CHECK-NEXT: ret; 331 define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 { 332 %r = call <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) 333 ret <2 x half> %r 334 } 335 336 ; CHECK-LABEL: test_call_flipped( 337 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_flipped_param_0]; 338 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_flipped_param_1]; 339 ; CHECK: { 340 ; CHECK-DAG: .param .align 4 .b8 param0[4]; 341 ; CHECK-DAG: .param .align 4 .b8 param1[4]; 342 ; CHECK-DAG: st.param.b32 [param0+0], [[B]]; 343 ; CHECK-DAG: st.param.b32 [param1+0], [[A]]; 344 ; CHECK-DAG: .param .align 4 .b8 retval0[4]; 345 ; CHECK: call.uni (retval0), 346 ; CHECK-NEXT: test_callee, 347 ; CHECK: ); 348 ; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; 349 ; CHECK-NEXT: } 350 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 351 ; CHECK-NEXT: ret; 352 define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 { 353 %r = call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a) 354 ret <2 x half> %r 355 } 356 357 ; CHECK-LABEL: test_tailcall_flipped( 358 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_tailcall_flipped_param_0]; 359 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_tailcall_flipped_param_1]; 360 ; CHECK: { 361 ; CHECK-DAG: .param .align 4 .b8 param0[4]; 362 ; CHECK-DAG: .param .align 4 .b8 param1[4]; 363 ; CHECK-DAG: st.param.b32 [param0+0], [[B]]; 364 ; CHECK-DAG: st.param.b32 [param1+0], [[A]]; 365 ; CHECK-DAG: .param .align 4 .b8 retval0[4]; 366 ; CHECK: call.uni (retval0), 367 ; CHECK-NEXT: test_callee, 368 ; CHECK: ); 369 ; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; 370 ; CHECK-NEXT: } 371 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 372 ; CHECK-NEXT: ret; 373 define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 { 374 %r = tail call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a) 375 ret <2 x half> %r 376 } 377 378 ; CHECK-LABEL: test_select( 379 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_param_0]; 380 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_param_1]; 381 ; CHECK-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2] 382 ; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; 383 ; CHECK-NEXT: selp.b32 [[R:%hh[0-9]+]], [[A]], [[B]], [[PRED]]; 384 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 385 ; CHECK-NEXT: ret; 386 define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 { 387 %r = select i1 %c, <2 x half> %a, <2 x half> %b 388 ret <2 x half> %r 389 } 390 391 ; CHECK-LABEL: test_select_cc( 392 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_param_0]; 393 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_param_1]; 394 ; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_param_2]; 395 ; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_param_3]; 396 ; 397 ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]] 398 ; 399 ; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] 400 ; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]] 401 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]]; 402 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]]; 403 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]]; 404 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]]; 405 ; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]] 406 ; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]] 407 ; 408 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 409 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 410 ; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]]; 411 ; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]]; 412 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 413 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 414 ; CHECK-NEXT: ret; 415 define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 { 416 %cc = fcmp une <2 x half> %c, %d 417 %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b 418 ret <2 x half> %r 419 } 420 421 ; CHECK-LABEL: test_select_cc_f32_f16( 422 ; CHECK-DAG: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_0]; 423 ; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_1]; 424 ; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_f32_f16_param_2]; 425 ; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_f32_f16_param_3]; 426 ; 427 ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]] 428 ; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] 429 ; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]] 430 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]]; 431 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]]; 432 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]]; 433 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]]; 434 ; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]] 435 ; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]] 436 ; 437 ; CHECK-DAG: selp.f32 [[R0:%f[0-9]+]], [[A0]], [[B0]], [[P0]]; 438 ; CHECK-DAG: selp.f32 [[R1:%f[0-9]+]], [[A1]], [[B1]], [[P1]]; 439 ; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]}; 440 ; CHECK-NEXT: ret; 441 define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b, 442 <2 x half> %c, <2 x half> %d) #0 { 443 %cc = fcmp une <2 x half> %c, %d 444 %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b 445 ret <2 x float> %r 446 } 447 448 ; CHECK-LABEL: test_select_cc_f16_f32( 449 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_f16_f32_param_0]; 450 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_f16_f32_param_1]; 451 ; CHECK-DAG: ld.param.v2.f32 {[[C0:%f[0-9]+]], [[C1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_2]; 452 ; CHECK-DAG: ld.param.v2.f32 {[[D0:%f[0-9]+]], [[D1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_3]; 453 ; CHECK-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[C0]], [[D0]] 454 ; CHECK-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[C1]], [[D1]] 455 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 456 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 457 ; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]]; 458 ; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]]; 459 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 460 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 461 ; CHECK-NEXT: ret; 462 define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b, 463 <2 x float> %c, <2 x float> %d) #0 { 464 %cc = fcmp une <2 x float> %c, %d 465 %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b 466 ret <2 x half> %r 467 } 468 469 ; CHECK-LABEL: test_fcmp_une( 470 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_une_param_0]; 471 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_une_param_1]; 472 ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 473 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 474 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 475 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 476 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 477 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 478 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 479 ; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 480 ; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 481 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 482 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 483 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 484 ; CHECK-NEXT: ret; 485 define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 { 486 %r = fcmp une <2 x half> %a, %b 487 ret <2 x i1> %r 488 } 489 490 ; CHECK-LABEL: test_fcmp_ueq( 491 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ueq_param_0]; 492 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ueq_param_1]; 493 ; CHECK-F16: setp.equ.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 494 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 495 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 496 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 497 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 498 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 499 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 500 ; CHECK-NOF16-DAG: setp.equ.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 501 ; CHECK-NOF16-DAG: setp.equ.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 502 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 503 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 504 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 505 ; CHECK-NEXT: ret; 506 define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 { 507 %r = fcmp ueq <2 x half> %a, %b 508 ret <2 x i1> %r 509 } 510 511 ; CHECK-LABEL: test_fcmp_ugt( 512 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ugt_param_0]; 513 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ugt_param_1]; 514 ; CHECK-F16: setp.gtu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 515 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 516 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 517 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 518 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 519 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 520 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 521 ; CHECK-NOF16-DAG: setp.gtu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 522 ; CHECK-NOF16-DAG: setp.gtu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 523 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 524 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 525 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 526 ; CHECK-NEXT: ret; 527 define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 { 528 %r = fcmp ugt <2 x half> %a, %b 529 ret <2 x i1> %r 530 } 531 532 ; CHECK-LABEL: test_fcmp_uge( 533 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uge_param_0]; 534 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uge_param_1]; 535 ; CHECK-F16: setp.geu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 536 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 537 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 538 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 539 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 540 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 541 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 542 ; CHECK-NOF16-DAG: setp.geu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 543 ; CHECK-NOF16-DAG: setp.geu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 544 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 545 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 546 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 547 ; CHECK-NEXT: ret; 548 define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 { 549 %r = fcmp uge <2 x half> %a, %b 550 ret <2 x i1> %r 551 } 552 553 ; CHECK-LABEL: test_fcmp_ult( 554 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ult_param_0]; 555 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ult_param_1]; 556 ; CHECK-F16: setp.ltu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 557 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 558 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 559 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 560 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 561 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 562 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 563 ; CHECK-NOF16-DAG: setp.ltu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 564 ; CHECK-NOF16-DAG: setp.ltu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 565 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 566 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 567 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 568 ; CHECK-NEXT: ret; 569 define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 { 570 %r = fcmp ult <2 x half> %a, %b 571 ret <2 x i1> %r 572 } 573 574 ; CHECK-LABEL: test_fcmp_ule( 575 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ule_param_0]; 576 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ule_param_1]; 577 ; CHECK-F16: setp.leu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 578 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 579 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 580 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 581 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 582 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 583 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 584 ; CHECK-NOF16-DAG: setp.leu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 585 ; CHECK-NOF16-DAG: setp.leu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 586 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 587 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 588 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 589 ; CHECK-NEXT: ret; 590 define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 { 591 %r = fcmp ule <2 x half> %a, %b 592 ret <2 x i1> %r 593 } 594 595 596 ; CHECK-LABEL: test_fcmp_uno( 597 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uno_param_0]; 598 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uno_param_1]; 599 ; CHECK-F16: setp.nan.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 600 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 601 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 602 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 603 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 604 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 605 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 606 ; CHECK-NOF16-DAG: setp.nan.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 607 ; CHECK-NOF16-DAG: setp.nan.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 608 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 609 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 610 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 611 ; CHECK-NEXT: ret; 612 define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 { 613 %r = fcmp uno <2 x half> %a, %b 614 ret <2 x i1> %r 615 } 616 617 ; CHECK-LABEL: test_fcmp_one( 618 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_one_param_0]; 619 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_one_param_1]; 620 ; CHECK-F16: setp.ne.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 621 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 622 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 623 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 624 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 625 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 626 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 627 ; CHECK-NOF16-DAG: setp.ne.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 628 ; CHECK-NOF16-DAG: setp.ne.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 629 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 630 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 631 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 632 ; CHECK-NEXT: ret; 633 define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 { 634 %r = fcmp one <2 x half> %a, %b 635 ret <2 x i1> %r 636 } 637 638 ; CHECK-LABEL: test_fcmp_oeq( 639 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oeq_param_0]; 640 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oeq_param_1]; 641 ; CHECK-F16: setp.eq.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 642 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 643 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 644 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 645 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 646 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 647 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 648 ; CHECK-NOF16-DAG: setp.eq.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 649 ; CHECK-NOF16-DAG: setp.eq.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 650 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 651 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 652 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 653 ; CHECK-NEXT: ret; 654 define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 { 655 %r = fcmp oeq <2 x half> %a, %b 656 ret <2 x i1> %r 657 } 658 659 ; CHECK-LABEL: test_fcmp_ogt( 660 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ogt_param_0]; 661 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ogt_param_1]; 662 ; CHECK-F16: setp.gt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 663 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 664 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 665 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 666 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 667 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 668 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 669 ; CHECK-NOF16-DAG: setp.gt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 670 ; CHECK-NOF16-DAG: setp.gt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 671 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 672 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 673 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 674 ; CHECK-NEXT: ret; 675 define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 { 676 %r = fcmp ogt <2 x half> %a, %b 677 ret <2 x i1> %r 678 } 679 680 ; CHECK-LABEL: test_fcmp_oge( 681 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oge_param_0]; 682 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oge_param_1]; 683 ; CHECK-F16: setp.ge.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 684 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 685 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 686 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 687 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 688 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 689 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 690 ; CHECK-NOF16-DAG: setp.ge.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 691 ; CHECK-NOF16-DAG: setp.ge.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 692 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 693 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 694 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 695 ; CHECK-NEXT: ret; 696 define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 { 697 %r = fcmp oge <2 x half> %a, %b 698 ret <2 x i1> %r 699 } 700 701 ; CHECK-LABEL: test_fcmp_olt( 702 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_olt_param_0]; 703 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_olt_param_1]; 704 ; CHECK-F16: setp.lt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 705 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 706 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 707 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 708 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 709 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 710 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 711 ; CHECK-NOF16-DAG: setp.lt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 712 ; CHECK-NOF16-DAG: setp.lt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 713 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 714 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 715 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 716 ; CHECK-NEXT: ret; 717 define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 { 718 %r = fcmp olt <2 x half> %a, %b 719 ret <2 x i1> %r 720 } 721 722 ; XCHECK-LABEL: test_fcmp_ole( 723 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ole_param_0]; 724 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ole_param_1]; 725 ; CHECK-F16: setp.le.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 726 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 727 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 728 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 729 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 730 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 731 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 732 ; CHECK-NOF16-DAG: setp.le.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 733 ; CHECK-NOF16-DAG: setp.le.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 734 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 735 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 736 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 737 ; CHECK-NEXT: ret; 738 define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 { 739 %r = fcmp ole <2 x half> %a, %b 740 ret <2 x i1> %r 741 } 742 743 ; CHECK-LABEL: test_fcmp_ord( 744 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ord_param_0]; 745 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ord_param_1]; 746 ; CHECK-F16: setp.num.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 747 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 748 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 749 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 750 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 751 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 752 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 753 ; CHECK-NOF16-DAG: setp.num.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 754 ; CHECK-NOF16-DAG: setp.num.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 755 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 756 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 757 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 758 ; CHECK-NEXT: ret; 759 define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 { 760 %r = fcmp ord <2 x half> %a, %b 761 ret <2 x i1> %r 762 } 763 764 ; CHECK-LABEL: test_fptosi_i32( 765 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i32_param_0]; 766 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 767 ; CHECK-DAG: cvt.rzi.s32.f16 [[R0:%r[0-9]+]], [[A0]]; 768 ; CHECK-DAG: cvt.rzi.s32.f16 [[R1:%r[0-9]+]], [[A1]]; 769 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} 770 ; CHECK: ret; 771 define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 { 772 %r = fptosi <2 x half> %a to <2 x i32> 773 ret <2 x i32> %r 774 } 775 776 ; CHECK-LABEL: test_fptosi_i64( 777 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i64_param_0]; 778 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 779 ; CHECK-DAG: cvt.rzi.s64.f16 [[R0:%rd[0-9]+]], [[A0]]; 780 ; CHECK-DAG: cvt.rzi.s64.f16 [[R1:%rd[0-9]+]], [[A1]]; 781 ; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} 782 ; CHECK: ret; 783 define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 { 784 %r = fptosi <2 x half> %a to <2 x i64> 785 ret <2 x i64> %r 786 } 787 788 ; CHECK-LABEL: test_fptoui_2xi32( 789 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi32_param_0]; 790 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 791 ; CHECK-DAG: cvt.rzi.u32.f16 [[R0:%r[0-9]+]], [[A0]]; 792 ; CHECK-DAG: cvt.rzi.u32.f16 [[R1:%r[0-9]+]], [[A1]]; 793 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} 794 ; CHECK: ret; 795 define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 { 796 %r = fptoui <2 x half> %a to <2 x i32> 797 ret <2 x i32> %r 798 } 799 800 ; CHECK-LABEL: test_fptoui_2xi64( 801 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi64_param_0]; 802 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 803 ; CHECK-DAG: cvt.rzi.u64.f16 [[R0:%rd[0-9]+]], [[A0]]; 804 ; CHECK-DAG: cvt.rzi.u64.f16 [[R1:%rd[0-9]+]], [[A1]]; 805 ; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} 806 ; CHECK: ret; 807 define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 { 808 %r = fptoui <2 x half> %a to <2 x i64> 809 ret <2 x i64> %r 810 } 811 812 ; CHECK-LABEL: test_uitofp_2xi32( 813 ; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_param_0]; 814 ; CHECK-DAG: cvt.rn.f16.u32 [[R0:%h[0-9]+]], [[A0]]; 815 ; CHECK-DAG: cvt.rn.f16.u32 [[R1:%h[0-9]+]], [[A1]]; 816 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 817 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 818 ; CHECK: ret; 819 define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 { 820 %r = uitofp <2 x i32> %a to <2 x half> 821 ret <2 x half> %r 822 } 823 824 ; CHECK-LABEL: test_uitofp_2xi64( 825 ; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_uitofp_2xi64_param_0]; 826 ; CHECK-DAG: cvt.rn.f32.u64 [[F0:%f[0-9]+]], [[A0]]; 827 ; CHECK-DAG: cvt.rn.f32.u64 [[F1:%f[0-9]+]], [[A1]]; 828 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[F0]]; 829 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[F1]]; 830 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 831 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 832 ; CHECK: ret; 833 define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 { 834 %r = uitofp <2 x i64> %a to <2 x half> 835 ret <2 x half> %r 836 } 837 838 ; CHECK-LABEL: test_sitofp_2xi32( 839 ; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_param_0]; 840 ; CHECK-DAG: cvt.rn.f16.s32 [[R0:%h[0-9]+]], [[A0]]; 841 ; CHECK-DAG: cvt.rn.f16.s32 [[R1:%h[0-9]+]], [[A1]]; 842 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 843 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 844 ; CHECK: ret; 845 define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 { 846 %r = sitofp <2 x i32> %a to <2 x half> 847 ret <2 x half> %r 848 } 849 850 ; CHECK-LABEL: test_sitofp_2xi64( 851 ; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_sitofp_2xi64_param_0]; 852 ; CHECK-DAG: cvt.rn.f32.s64 [[F0:%f[0-9]+]], [[A0]]; 853 ; CHECK-DAG: cvt.rn.f32.s64 [[F1:%f[0-9]+]], [[A1]]; 854 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[F0]]; 855 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[F1]]; 856 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 857 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 858 ; CHECK: ret; 859 define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 { 860 %r = sitofp <2 x i64> %a to <2 x half> 861 ret <2 x half> %r 862 } 863 864 ; CHECK-LABEL: test_uitofp_2xi32_fadd( 865 ; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_fadd_param_0]; 866 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_uitofp_2xi32_fadd_param_1]; 867 ; CHECK-DAG: cvt.rn.f16.u32 [[C0:%h[0-9]+]], [[A0]]; 868 ; CHECK-DAG: cvt.rn.f16.u32 [[C1:%h[0-9]+]], [[A1]]; 869 870 ; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]} 871 ; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]]; 872 ; 873 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 874 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 875 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 876 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 877 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]] 878 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]]; 879 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]]; 880 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 881 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 882 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 883 ; 884 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 885 ; CHECK: ret; 886 define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { 887 %c = uitofp <2 x i32> %a to <2 x half> 888 %r = fadd <2 x half> %b, %c 889 ret <2 x half> %r 890 } 891 892 ; CHECK-LABEL: test_sitofp_2xi32_fadd( 893 ; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_fadd_param_0]; 894 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_sitofp_2xi32_fadd_param_1]; 895 ; CHECK-DAG: cvt.rn.f16.s32 [[C0:%h[0-9]+]], [[A0]]; 896 ; CHECK-DAG: cvt.rn.f16.s32 [[C1:%h[0-9]+]], [[A1]]; 897 ; 898 ; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]} 899 ; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]]; 900 ; 901 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 902 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 903 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 904 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 905 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]] 906 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]]; 907 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]]; 908 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 909 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 910 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 911 ; 912 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 913 ; CHECK: ret; 914 define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { 915 %c = sitofp <2 x i32> %a to <2 x half> 916 %r = fadd <2 x half> %b, %c 917 ret <2 x half> %r 918 } 919 920 ; CHECK-LABEL: test_fptrunc_2xfloat( 921 ; CHECK: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_fptrunc_2xfloat_param_0]; 922 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[A0]]; 923 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[A1]]; 924 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 925 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 926 ; CHECK: ret; 927 define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 { 928 %r = fptrunc <2 x float> %a to <2 x half> 929 ret <2 x half> %r 930 } 931 932 ; CHECK-LABEL: test_fptrunc_2xdouble( 933 ; CHECK: ld.param.v2.f64 {[[A0:%fd[0-9]+]], [[A1:%fd[0-9]+]]}, [test_fptrunc_2xdouble_param_0]; 934 ; CHECK-DAG: cvt.rn.f16.f64 [[R0:%h[0-9]+]], [[A0]]; 935 ; CHECK-DAG: cvt.rn.f16.f64 [[R1:%h[0-9]+]], [[A1]]; 936 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 937 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 938 ; CHECK: ret; 939 define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 { 940 %r = fptrunc <2 x double> %a to <2 x half> 941 ret <2 x half> %r 942 } 943 944 ; CHECK-LABEL: test_fpext_2xfloat( 945 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xfloat_param_0]; 946 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 947 ; CHECK-DAG: cvt.f32.f16 [[R0:%f[0-9]+]], [[A0]]; 948 ; CHECK-DAG: cvt.f32.f16 [[R1:%f[0-9]+]], [[A1]]; 949 ; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]}; 950 ; CHECK: ret; 951 define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 { 952 %r = fpext <2 x half> %a to <2 x float> 953 ret <2 x float> %r 954 } 955 956 ; CHECK-LABEL: test_fpext_2xdouble( 957 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xdouble_param_0]; 958 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 959 ; CHECK-DAG: cvt.f64.f16 [[R0:%fd[0-9]+]], [[A0]]; 960 ; CHECK-DAG: cvt.f64.f16 [[R1:%fd[0-9]+]], [[A1]]; 961 ; CHECK-NEXT: st.param.v2.f64 [func_retval0+0], {[[R0]], [[R1]]}; 962 ; CHECK: ret; 963 define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 { 964 %r = fpext <2 x half> %a to <2 x double> 965 ret <2 x double> %r 966 } 967 968 969 ; CHECK-LABEL: test_bitcast_2xhalf_to_2xi16( 970 ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_bitcast_2xhalf_to_2xi16_param_0]; 971 ; CHECK-DAG: cvt.u16.u32 [[R0:%rs[0-9]+]], [[A]] 972 ; CHECK-DAG: shr.u32 [[AH:%r[0-9]+]], [[A]], 16 973 ; CHECK-DAG: cvt.u16.u32 [[R1:%rs[0-9]+]], [[AH]] 974 ; CHECK: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]} 975 ; CHECK: ret; 976 define <2 x i16> @test_bitcast_2xhalf_to_2xi16(<2 x half> %a) #0 { 977 %r = bitcast <2 x half> %a to <2 x i16> 978 ret <2 x i16> %r 979 } 980 981 ; CHECK-LABEL: test_bitcast_2xi16_to_2xhalf( 982 ; CHECK: ld.param.v2.u16 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [test_bitcast_2xi16_to_2xhalf_param_0]; 983 ; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RS0]]; 984 ; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RS1]]; 985 ; CHECK-DAG: shl.b32 [[R1H:%r[0-9]+]], [[R1]], 16; 986 ; CHECK-DAG: or.b32 [[R1H0L:%r[0-9]+]], [[R0]], [[R1H]]; 987 ; CHECK: mov.b32 [[R:%hh[0-9]+]], [[R1H0L]]; 988 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 989 ; CHECK: ret; 990 define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 { 991 %r = bitcast <2 x i16> %a to <2 x half> 992 ret <2 x half> %r 993 } 994 995 996 declare <2 x half> @llvm.sqrt.f16(<2 x half> %a) #0 997 declare <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b) #0 998 declare <2 x half> @llvm.sin.f16(<2 x half> %a) #0 999 declare <2 x half> @llvm.cos.f16(<2 x half> %a) #0 1000 declare <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) #0 1001 declare <2 x half> @llvm.exp.f16(<2 x half> %a) #0 1002 declare <2 x half> @llvm.exp2.f16(<2 x half> %a) #0 1003 declare <2 x half> @llvm.log.f16(<2 x half> %a) #0 1004 declare <2 x half> @llvm.log10.f16(<2 x half> %a) #0 1005 declare <2 x half> @llvm.log2.f16(<2 x half> %a) #0 1006 declare <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 1007 declare <2 x half> @llvm.fabs.f16(<2 x half> %a) #0 1008 declare <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) #0 1009 declare <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) #0 1010 declare <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) #0 1011 declare <2 x half> @llvm.floor.f16(<2 x half> %a) #0 1012 declare <2 x half> @llvm.ceil.f16(<2 x half> %a) #0 1013 declare <2 x half> @llvm.trunc.f16(<2 x half> %a) #0 1014 declare <2 x half> @llvm.rint.f16(<2 x half> %a) #0 1015 declare <2 x half> @llvm.nearbyint.f16(<2 x half> %a) #0 1016 declare <2 x half> @llvm.round.f16(<2 x half> %a) #0 1017 declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 1018 1019 ; CHECK-LABEL: test_sqrt( 1020 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sqrt_param_0]; 1021 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1022 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1023 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1024 ; CHECK-DAG: sqrt.rn.f32 [[RF0:%f[0-9]+]], [[AF0]]; 1025 ; CHECK-DAG: sqrt.rn.f32 [[RF1:%f[0-9]+]], [[AF1]]; 1026 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1027 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1028 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1029 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1030 ; CHECK: ret; 1031 define <2 x half> @test_sqrt(<2 x half> %a) #0 { 1032 %r = call <2 x half> @llvm.sqrt.f16(<2 x half> %a) 1033 ret <2 x half> %r 1034 } 1035 1036 ;;; Can't do this yet: requires libcall. 1037 ; XCHECK-LABEL: test_powi( 1038 ;define <2 x half> @test_powi(<2 x half> %a, <2 x i32> %b) #0 { 1039 ; %r = call <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b) 1040 ; ret <2 x half> %r 1041 ;} 1042 1043 ; CHECK-LABEL: test_sin( 1044 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sin_param_0]; 1045 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1046 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1047 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1048 ; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]]; 1049 ; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]]; 1050 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1051 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1052 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1053 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1054 ; CHECK: ret; 1055 define <2 x half> @test_sin(<2 x half> %a) #0 #1 { 1056 %r = call <2 x half> @llvm.sin.f16(<2 x half> %a) 1057 ret <2 x half> %r 1058 } 1059 1060 ; CHECK-LABEL: test_cos( 1061 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_cos_param_0]; 1062 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1063 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1064 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1065 ; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]]; 1066 ; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]]; 1067 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1068 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1069 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1070 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1071 ; CHECK: ret; 1072 define <2 x half> @test_cos(<2 x half> %a) #0 #1 { 1073 %r = call <2 x half> @llvm.cos.f16(<2 x half> %a) 1074 ret <2 x half> %r 1075 } 1076 1077 ;;; Can't do this yet: requires libcall. 1078 ; XCHECK-LABEL: test_pow( 1079 ;define <2 x half> @test_pow(<2 x half> %a, <2 x half> %b) #0 { 1080 ; %r = call <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) 1081 ; ret <2 x half> %r 1082 ;} 1083 1084 ;;; Can't do this yet: requires libcall. 1085 ; XCHECK-LABEL: test_exp( 1086 ;define <2 x half> @test_exp(<2 x half> %a) #0 { 1087 ; %r = call <2 x half> @llvm.exp.f16(<2 x half> %a) 1088 ; ret <2 x half> %r 1089 ;} 1090 1091 ;;; Can't do this yet: requires libcall. 1092 ; XCHECK-LABEL: test_exp2( 1093 ;define <2 x half> @test_exp2(<2 x half> %a) #0 { 1094 ; %r = call <2 x half> @llvm.exp2.f16(<2 x half> %a) 1095 ; ret <2 x half> %r 1096 ;} 1097 1098 ;;; Can't do this yet: requires libcall. 1099 ; XCHECK-LABEL: test_log( 1100 ;define <2 x half> @test_log(<2 x half> %a) #0 { 1101 ; %r = call <2 x half> @llvm.log.f16(<2 x half> %a) 1102 ; ret <2 x half> %r 1103 ;} 1104 1105 ;;; Can't do this yet: requires libcall. 1106 ; XCHECK-LABEL: test_log10( 1107 ;define <2 x half> @test_log10(<2 x half> %a) #0 { 1108 ; %r = call <2 x half> @llvm.log10.f16(<2 x half> %a) 1109 ; ret <2 x half> %r 1110 ;} 1111 1112 ;;; Can't do this yet: requires libcall. 1113 ; XCHECK-LABEL: test_log2( 1114 ;define <2 x half> @test_log2(<2 x half> %a) #0 { 1115 ; %r = call <2 x half> @llvm.log2.f16(<2 x half> %a) 1116 ; ret <2 x half> %r 1117 ;} 1118 1119 ; CHECK-LABEL: test_fma( 1120 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fma_param_0]; 1121 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fma_param_1]; 1122 ; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fma_param_2]; 1123 ; 1124 ; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]]; 1125 ; 1126 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1127 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1128 ; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] 1129 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 1130 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 1131 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 1132 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 1133 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 1134 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 1135 ; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]]; 1136 ; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]]; 1137 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 1138 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 1139 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1140 1141 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1142 ; CHECK: ret 1143 define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { 1144 %r = call <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) 1145 ret <2 x half> %r 1146 } 1147 1148 ; CHECK-LABEL: test_fabs( 1149 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fabs_param_0]; 1150 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1151 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1152 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1153 ; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]]; 1154 ; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]]; 1155 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1156 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1157 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1158 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1159 ; CHECK: ret; 1160 define <2 x half> @test_fabs(<2 x half> %a) #0 { 1161 %r = call <2 x half> @llvm.fabs.f16(<2 x half> %a) 1162 ret <2 x half> %r 1163 } 1164 1165 ; CHECK-LABEL: test_minnum( 1166 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_minnum_param_0]; 1167 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_minnum_param_1]; 1168 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1169 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1170 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1171 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1172 ; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]]; 1173 ; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]]; 1174 ; CHECK-DAG: min.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]]; 1175 ; CHECK-DAG: min.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]]; 1176 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1177 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1178 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1179 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1180 ; CHECK: ret; 1181 define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 { 1182 %r = call <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) 1183 ret <2 x half> %r 1184 } 1185 1186 ; CHECK-LABEL: test_maxnum( 1187 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_maxnum_param_0]; 1188 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_maxnum_param_1]; 1189 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1190 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1191 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1192 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1193 ; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]]; 1194 ; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]]; 1195 ; CHECK-DAG: max.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]]; 1196 ; CHECK-DAG: max.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]]; 1197 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1198 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1199 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1200 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1201 ; CHECK: ret; 1202 define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 { 1203 %r = call <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) 1204 ret <2 x half> %r 1205 } 1206 1207 ; CHECK-LABEL: test_copysign( 1208 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_param_0]; 1209 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_param_1]; 1210 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1211 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1212 ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; 1213 ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; 1214 ; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]]; 1215 ; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]]; 1216 ; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767; 1217 ; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767; 1218 ; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768; 1219 ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768; 1220 ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]]; 1221 ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]]; 1222 ; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; 1223 ; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; 1224 ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1225 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1226 ; CHECK: ret; 1227 define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 { 1228 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) 1229 ret <2 x half> %r 1230 } 1231 1232 ; CHECK-LABEL: test_copysign_f32( 1233 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f32_param_0]; 1234 ; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1]; 1235 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1236 ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; 1237 ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; 1238 ; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]]; 1239 ; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]]; 1240 ; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767; 1241 ; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767; 1242 ; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648; 1243 ; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648; 1244 ; CHECK-DAG: shr.u32 [[BY0:%r[0-9]+]], [[BX0]], 16; 1245 ; CHECK-DAG: shr.u32 [[BY1:%r[0-9]+]], [[BX1]], 16; 1246 ; CHECK-DAG: cvt.u16.u32 [[BZ0:%rs[0-9]+]], [[BY0]]; 1247 ; CHECK-DAG: cvt.u16.u32 [[BZ1:%rs[0-9]+]], [[BY1]]; 1248 ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]]; 1249 ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]]; 1250 ; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; 1251 ; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; 1252 ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1253 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1254 ; CHECK: ret; 1255 define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { 1256 %tb = fptrunc <2 x float> %b to <2 x half> 1257 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb) 1258 ret <2 x half> %r 1259 } 1260 1261 ; CHECK-LABEL: test_copysign_f64( 1262 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f64_param_0]; 1263 ; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1]; 1264 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1265 ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; 1266 ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; 1267 ; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]]; 1268 ; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]]; 1269 ; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767; 1270 ; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767; 1271 ; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808; 1272 ; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808; 1273 ; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48; 1274 ; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48; 1275 ; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]]; 1276 ; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]]; 1277 ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]]; 1278 ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]]; 1279 ; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; 1280 ; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; 1281 ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1282 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1283 ; CHECK: ret; 1284 define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { 1285 %tb = fptrunc <2 x double> %b to <2 x half> 1286 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb) 1287 ret <2 x half> %r 1288 } 1289 1290 ; CHECK-LABEL: test_copysign_extended( 1291 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_extended_param_0]; 1292 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_extended_param_1]; 1293 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1294 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1295 ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; 1296 ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; 1297 ; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]]; 1298 ; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]]; 1299 ; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767; 1300 ; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767; 1301 ; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768; 1302 ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768; 1303 ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]]; 1304 ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]]; 1305 ; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; 1306 ; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; 1307 ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1308 ; CHECK: mov.b32 {[[RX0:%h[0-9]+]], [[RX1:%h[0-9]+]]}, [[R]] 1309 ; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]]; 1310 ; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]]; 1311 ; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]}; 1312 ; CHECK: ret; 1313 define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 { 1314 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) 1315 %xr = fpext <2 x half> %r to <2 x float> 1316 ret <2 x float> %xr 1317 } 1318 1319 ; CHECK-LABEL: test_floor( 1320 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_floor_param_0]; 1321 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1322 ; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1323 ; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1324 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1325 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1326 ; CHECK: ret; 1327 define <2 x half> @test_floor(<2 x half> %a) #0 { 1328 %r = call <2 x half> @llvm.floor.f16(<2 x half> %a) 1329 ret <2 x half> %r 1330 } 1331 1332 ; CHECK-LABEL: test_ceil( 1333 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_ceil_param_0]; 1334 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1335 ; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1336 ; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1337 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1338 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1339 ; CHECK: ret; 1340 define <2 x half> @test_ceil(<2 x half> %a) #0 { 1341 %r = call <2 x half> @llvm.ceil.f16(<2 x half> %a) 1342 ret <2 x half> %r 1343 } 1344 1345 ; CHECK-LABEL: test_trunc( 1346 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_trunc_param_0]; 1347 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1348 ; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1349 ; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1350 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1351 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1352 ; CHECK: ret; 1353 define <2 x half> @test_trunc(<2 x half> %a) #0 { 1354 %r = call <2 x half> @llvm.trunc.f16(<2 x half> %a) 1355 ret <2 x half> %r 1356 } 1357 1358 ; CHECK-LABEL: test_rint( 1359 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_rint_param_0]; 1360 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1361 ; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1362 ; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1363 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1364 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1365 ; CHECK: ret; 1366 define <2 x half> @test_rint(<2 x half> %a) #0 { 1367 %r = call <2 x half> @llvm.rint.f16(<2 x half> %a) 1368 ret <2 x half> %r 1369 } 1370 1371 ; CHECK-LABEL: test_nearbyint( 1372 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_nearbyint_param_0]; 1373 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1374 ; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1375 ; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1376 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1377 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1378 ; CHECK: ret; 1379 define <2 x half> @test_nearbyint(<2 x half> %a) #0 { 1380 %r = call <2 x half> @llvm.nearbyint.f16(<2 x half> %a) 1381 ret <2 x half> %r 1382 } 1383 1384 ; CHECK-LABEL: test_round( 1385 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_round_param_0]; 1386 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1387 ; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1388 ; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1389 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1390 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1391 ; CHECK: ret; 1392 define <2 x half> @test_round(<2 x half> %a) #0 { 1393 %r = call <2 x half> @llvm.round.f16(<2 x half> %a) 1394 ret <2 x half> %r 1395 } 1396 1397 ; CHECK-LABEL: test_fmuladd( 1398 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmuladd_param_0]; 1399 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmuladd_param_1]; 1400 ; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fmuladd_param_2]; 1401 ; 1402 ; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]]; 1403 ; 1404 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1405 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1406 ; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] 1407 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 1408 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 1409 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 1410 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 1411 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 1412 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 1413 ; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]]; 1414 ; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]]; 1415 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 1416 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 1417 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1418 ; 1419 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1420 ; CHECK: ret; 1421 define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { 1422 %r = call <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) 1423 ret <2 x half> %r 1424 } 1425 1426 ; CHECK-LABEL: test_shufflevector( 1427 ; CHECK: mov.b32 {%h1, %h2}, %hh1; 1428 ; CHECK: mov.b32 %hh2, {%h2, %h1}; 1429 define <2 x half> @test_shufflevector(<2 x half> %a) #0 { 1430 %s = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0> 1431 ret <2 x half> %s 1432 } 1433 1434 ; CHECK-LABEL: test_insertelement( 1435 ; CHECK: mov.b32 {%h2, %tmp_hi}, %hh1; 1436 ; CHECK: mov.b32 %hh2, {%h2, %h1}; 1437 define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 { 1438 %i = insertelement <2 x half> %a, half %x, i64 1 1439 ret <2 x half> %i 1440 } 1441 1442 attributes #0 = { nounwind } 1443 attributes #1 = { "unsafe-fp-math" = "true" } 1444