1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4 ; half args should be promoted to float 5 6 ; GCN-LABEL: {{^}}load_f16_arg: 7 ; GCN: s_load_dword [[ARG:s[0-9]+]] 8 ; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]] 9 ; GCN: buffer_store_short [[CVT]] 10 define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 { 11 store half %arg, half addrspace(1)* %out 12 ret void 13 } 14 15 ; GCN-LABEL: {{^}}load_v2f16_arg: 16 ; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 17 ; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46 18 ; GCN-DAG: buffer_store_short [[V0]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 19 ; GCN-DAG: buffer_store_short [[V1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} 20 ; GCN: s_endpgm 21 define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 { 22 store <2 x half> %arg, <2 x half> addrspace(1)* %out 23 ret void 24 } 25 26 ; GCN-LABEL: {{^}}load_v3f16_arg: 27 ; GCN: buffer_load_ushort 28 ; GCN: buffer_load_ushort 29 ; GCN: buffer_load_ushort 30 ; GCN-NOT: buffer_load 31 ; GCN-DAG: buffer_store_dword 32 ; GCN-DAG: buffer_store_short 33 ; GCN-NOT: buffer_store 34 ; GCN: s_endpgm 35 define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 { 36 store <3 x half> %arg, <3 x half> addrspace(1)* %out 37 ret void 38 } 39 40 ; GCN-LABEL: {{^}}load_v4f16_arg: 41 ; GCN: buffer_load_ushort 42 ; GCN: buffer_load_ushort 43 ; GCN: buffer_load_ushort 44 ; GCN: buffer_load_ushort 45 ; GCN: buffer_store_short 46 ; GCN: buffer_store_short 47 ; GCN: buffer_store_short 48 ; GCN: buffer_store_short 49 ; GCN: s_endpgm 50 define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 { 51 store <4 x half> %arg, <4 x half> addrspace(1)* %out 52 ret void 53 } 54 55 ; GCN-LABEL: {{^}}load_v8f16_arg: 56 define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 { 57 store <8 x half> %arg, <8 x half> addrspace(1)* %out 58 ret void 59 } 60 61 ; GCN-LABEL: {{^}}extload_v2f16_arg: 62 define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 { 63 %fpext = fpext <2 x half> %in to <2 x float> 64 store <2 x float> %fpext, <2 x float> addrspace(1)* %out 65 ret void 66 } 67 68 ; GCN-LABEL: {{^}}extload_f16_to_f32_arg: 69 define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 { 70 %ext = fpext half %arg to float 71 store float %ext, float addrspace(1)* %out 72 ret void 73 } 74 75 ; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg: 76 define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 { 77 %ext = fpext <2 x half> %arg to <2 x float> 78 store <2 x float> %ext, <2 x float> addrspace(1)* %out 79 ret void 80 } 81 82 ; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg: 83 ; GCN: buffer_load_ushort 84 ; GCN: buffer_load_ushort 85 ; GCN: buffer_load_ushort 86 ; GCN-NOT: buffer_load 87 ; GCN: v_cvt_f32_f16_e32 88 ; GCN: v_cvt_f32_f16_e32 89 ; GCN: v_cvt_f32_f16_e32 90 ; GCN-NOT: v_cvt_f32_f16 91 ; GCN-DAG: buffer_store_dword 92 ; GCN-DAG: buffer_store_dwordx2 93 ; GCN: s_endpgm 94 define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 { 95 %ext = fpext <3 x half> %arg to <3 x float> 96 store <3 x float> %ext, <3 x float> addrspace(1)* %out 97 ret void 98 } 99 100 ; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg: 101 define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 { 102 %ext = fpext <4 x half> %arg to <4 x float> 103 store <4 x float> %ext, <4 x float> addrspace(1)* %out 104 ret void 105 } 106 107 ; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg: 108 ; GCN: buffer_load_ushort 109 ; GCN: buffer_load_ushort 110 ; GCN: buffer_load_ushort 111 ; GCN: buffer_load_ushort 112 ; GCN: buffer_load_ushort 113 ; GCN: buffer_load_ushort 114 ; GCN: buffer_load_ushort 115 ; GCN: buffer_load_ushort 116 117 ; GCN: v_cvt_f32_f16_e32 118 ; GCN: v_cvt_f32_f16_e32 119 ; GCN: v_cvt_f32_f16_e32 120 ; GCN: v_cvt_f32_f16_e32 121 ; GCN: v_cvt_f32_f16_e32 122 ; GCN: v_cvt_f32_f16_e32 123 ; GCN: v_cvt_f32_f16_e32 124 ; GCN: v_cvt_f32_f16_e32 125 126 ; GCN: buffer_store_dwordx4 127 ; GCN: buffer_store_dwordx4 128 define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 { 129 %ext = fpext <8 x half> %arg to <8 x float> 130 store <8 x float> %ext, <8 x float> addrspace(1)* %out 131 ret void 132 } 133 134 ; GCN-LABEL: {{^}}extload_f16_to_f64_arg: 135 ; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}} 136 ; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}} 137 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]] 138 ; GCN: buffer_store_dwordx2 [[RESULT]] 139 define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 { 140 %ext = fpext half %arg to double 141 store double %ext, double addrspace(1)* %out 142 ret void 143 } 144 145 ; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg: 146 ; GCN-DAG: buffer_load_ushort v 147 ; GCN-DAG: buffer_load_ushort v 148 ; GCN-DAG: v_cvt_f32_f16_e32 149 ; GCN-DAG: v_cvt_f32_f16_e32 150 ; GCN-DAG: v_cvt_f64_f32_e32 151 ; GCN-DAG: v_cvt_f64_f32_e32 152 ; GCN: s_endpgm 153 define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 { 154 %ext = fpext <2 x half> %arg to <2 x double> 155 store <2 x double> %ext, <2 x double> addrspace(1)* %out 156 ret void 157 } 158 159 ; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg: 160 ; GCN-DAG: buffer_load_ushort v 161 ; GCN-DAG: buffer_load_ushort v 162 ; GCN-DAG: buffer_load_ushort v 163 ; GCN-DAG: v_cvt_f32_f16_e32 164 ; GCN-DAG: v_cvt_f32_f16_e32 165 ; GCN-DAG: v_cvt_f32_f16_e32 166 ; GCN-DAG: v_cvt_f64_f32_e32 167 ; GCN-DAG: v_cvt_f64_f32_e32 168 ; GCN-DAG: v_cvt_f64_f32_e32 169 ; GCN: s_endpgm 170 define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 { 171 %ext = fpext <3 x half> %arg to <3 x double> 172 store <3 x double> %ext, <3 x double> addrspace(1)* %out 173 ret void 174 } 175 176 ; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg: 177 ; GCN-DAG: buffer_load_ushort v 178 ; GCN-DAG: buffer_load_ushort v 179 ; GCN-DAG: buffer_load_ushort v 180 ; GCN-DAG: buffer_load_ushort v 181 ; GCN-DAG: v_cvt_f32_f16_e32 182 ; GCN-DAG: v_cvt_f32_f16_e32 183 ; GCN-DAG: v_cvt_f32_f16_e32 184 ; GCN-DAG: v_cvt_f32_f16_e32 185 ; GCN-DAG: v_cvt_f64_f32_e32 186 ; GCN-DAG: v_cvt_f64_f32_e32 187 ; GCN-DAG: v_cvt_f64_f32_e32 188 ; GCN-DAG: v_cvt_f64_f32_e32 189 ; GCN: s_endpgm 190 define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 { 191 %ext = fpext <4 x half> %arg to <4 x double> 192 store <4 x double> %ext, <4 x double> addrspace(1)* %out 193 ret void 194 } 195 196 ; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg: 197 ; GCN-DAG: buffer_load_ushort v 198 ; GCN-DAG: buffer_load_ushort v 199 ; GCN-DAG: buffer_load_ushort v 200 ; GCN-DAG: buffer_load_ushort v 201 202 ; GCN-DAG: buffer_load_ushort v 203 ; GCN-DAG: buffer_load_ushort v 204 ; GCN-DAG: buffer_load_ushort v 205 ; GCN-DAG: buffer_load_ushort v 206 207 ; GCN-DAG: v_cvt_f32_f16_e32 208 ; GCN-DAG: v_cvt_f32_f16_e32 209 ; GCN-DAG: v_cvt_f32_f16_e32 210 ; GCN-DAG: v_cvt_f32_f16_e32 211 212 ; GCN-DAG: v_cvt_f32_f16_e32 213 ; GCN-DAG: v_cvt_f32_f16_e32 214 ; GCN-DAG: v_cvt_f32_f16_e32 215 ; GCN-DAG: v_cvt_f32_f16_e32 216 217 ; GCN-DAG: v_cvt_f64_f32_e32 218 ; GCN-DAG: v_cvt_f64_f32_e32 219 ; GCN-DAG: v_cvt_f64_f32_e32 220 ; GCN-DAG: v_cvt_f64_f32_e32 221 222 ; GCN-DAG: v_cvt_f64_f32_e32 223 ; GCN-DAG: v_cvt_f64_f32_e32 224 ; GCN-DAG: v_cvt_f64_f32_e32 225 ; GCN-DAG: v_cvt_f64_f32_e32 226 227 ; GCN: s_endpgm 228 define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 { 229 %ext = fpext <8 x half> %arg to <8 x double> 230 store <8 x double> %ext, <8 x double> addrspace(1)* %out 231 ret void 232 } 233 234 ; GCN-LABEL: {{^}}global_load_store_f16: 235 ; GCN: buffer_load_ushort [[TMP:v[0-9]+]] 236 ; GCN: buffer_store_short [[TMP]] 237 define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { 238 %val = load half, half addrspace(1)* %in 239 store half %val, half addrspace(1)* %out 240 ret void 241 } 242 243 ; GCN-LABEL: {{^}}global_load_store_v2f16: 244 ; GCN: buffer_load_dword [[TMP:v[0-9]+]] 245 ; GCN: buffer_store_dword [[TMP]] 246 define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { 247 %val = load <2 x half>, <2 x half> addrspace(1)* %in 248 store <2 x half> %val, <2 x half> addrspace(1)* %out 249 ret void 250 } 251 252 ; GCN-LABEL: {{^}}global_load_store_v4f16: 253 ; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]] 254 ; GCN: buffer_store_dwordx2 [[TMP]] 255 define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 { 256 %val = load <4 x half>, <4 x half> addrspace(1)* %in 257 store <4 x half> %val, <4 x half> addrspace(1)* %out 258 ret void 259 } 260 261 ; GCN-LABEL: {{^}}global_load_store_v8f16: 262 ; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]] 263 ; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]] 264 ; GCN: s_endpgm 265 define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 { 266 %val = load <8 x half>, <8 x half> addrspace(1)* %in 267 store <8 x half> %val, <8 x half> addrspace(1)* %out 268 ret void 269 } 270 271 ; GCN-LABEL: {{^}}global_extload_f16_to_f32: 272 ; GCN: buffer_load_ushort [[LOAD:v[0-9]+]] 273 ; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]] 274 ; GCN: buffer_store_dword [[CVT]] 275 define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 { 276 %val = load half, half addrspace(1)* %in 277 %cvt = fpext half %val to float 278 store float %cvt, float addrspace(1)* %out 279 ret void 280 } 281 282 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32: 283 ; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 284 ; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} 285 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] 286 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] 287 ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}} 288 ; GCN: s_endpgm 289 define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { 290 %val = load <2 x half>, <2 x half> addrspace(1)* %in 291 %cvt = fpext <2 x half> %val to <2 x float> 292 store <2 x float> %cvt, <2 x float> addrspace(1)* %out 293 ret void 294 } 295 296 ; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32: 297 define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 { 298 %val = load <3 x half>, <3 x half> addrspace(1)* %in 299 %cvt = fpext <3 x half> %val to <3 x float> 300 store <3 x float> %cvt, <3 x float> addrspace(1)* %out 301 ret void 302 } 303 304 ; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32: 305 define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 { 306 %val = load <4 x half>, <4 x half> addrspace(1)* %in 307 %cvt = fpext <4 x half> %val to <4 x float> 308 store <4 x float> %cvt, <4 x float> addrspace(1)* %out 309 ret void 310 } 311 312 ; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32: 313 define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 { 314 %val = load <8 x half>, <8 x half> addrspace(1)* %in 315 %cvt = fpext <8 x half> %val to <8 x float> 316 store <8 x float> %cvt, <8 x float> addrspace(1)* %out 317 ret void 318 } 319 320 ; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32: 321 ; GCN: buffer_load_ushort 322 ; GCN: buffer_load_ushort 323 ; GCN: buffer_load_ushort 324 ; GCN: buffer_load_ushort 325 ; GCN: buffer_load_ushort 326 ; GCN: buffer_load_ushort 327 ; GCN: buffer_load_ushort 328 ; GCN: buffer_load_ushort 329 ; GCN: buffer_load_ushort 330 ; GCN: buffer_load_ushort 331 ; GCN: buffer_load_ushort 332 ; GCN: buffer_load_ushort 333 ; GCN: buffer_load_ushort 334 ; GCN: buffer_load_ushort 335 ; GCN: buffer_load_ushort 336 ; GCN: buffer_load_ushort 337 338 ; GCN: v_cvt_f32_f16_e32 339 ; GCN: v_cvt_f32_f16_e32 340 ; GCN: v_cvt_f32_f16_e32 341 ; GCN: v_cvt_f32_f16_e32 342 ; GCN: v_cvt_f32_f16_e32 343 ; GCN: v_cvt_f32_f16_e32 344 ; GCN: v_cvt_f32_f16_e32 345 ; GCN: v_cvt_f32_f16_e32 346 ; GCN: v_cvt_f32_f16_e32 347 ; GCN: v_cvt_f32_f16_e32 348 ; GCN: v_cvt_f32_f16_e32 349 ; GCN: v_cvt_f32_f16_e32 350 ; GCN: v_cvt_f32_f16_e32 351 ; GCN: v_cvt_f32_f16_e32 352 ; GCN: v_cvt_f32_f16_e32 353 ; GCN: v_cvt_f32_f16_e32 354 355 ; GCN: buffer_store_dwordx4 356 ; GCN: buffer_store_dwordx4 357 ; GCN: buffer_store_dwordx4 358 ; GCN: buffer_store_dwordx4 359 360 ; GCN: s_endpgm 361 define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 { 362 %val = load <16 x half>, <16 x half> addrspace(1)* %in 363 %cvt = fpext <16 x half> %val to <16 x float> 364 store <16 x float> %cvt, <16 x float> addrspace(1)* %out 365 ret void 366 } 367 368 ; GCN-LABEL: {{^}}global_extload_f16_to_f64: 369 ; GCN: buffer_load_ushort [[LOAD:v[0-9]+]] 370 ; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]] 371 ; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]] 372 ; GCN: buffer_store_dwordx2 [[CVT1]] 373 define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 { 374 %val = load half, half addrspace(1)* %in 375 %cvt = fpext half %val to double 376 store double %cvt, double addrspace(1)* %out 377 ret void 378 } 379 380 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64: 381 ; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 382 ; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} 383 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] 384 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] 385 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]] 386 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]] 387 ; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}} 388 ; GCN: s_endpgm 389 define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { 390 %val = load <2 x half>, <2 x half> addrspace(1)* %in 391 %cvt = fpext <2 x half> %val to <2 x double> 392 store <2 x double> %cvt, <2 x double> addrspace(1)* %out 393 ret void 394 } 395 396 ; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64: 397 398 ; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]] 399 ; SI: v_lshr_b64 v{{\[[0-9]+:[0-9]+\]}}, [[LOAD]], 32 400 ; VI: v_lshrrev_b64 v{{\[[0-9]+:[0-9]+\]}}, 32, [[LOAD]] 401 ; GCN: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}} 402 403 ; GCN: v_cvt_f32_f16_e32 404 ; GCN: v_cvt_f32_f16_e32 405 ; GCN: v_cvt_f32_f16_e32 406 ; GCN-NOT: v_cvt_f32_f16_e32 407 408 ; GCN: v_cvt_f64_f32_e32 409 ; GCN: v_cvt_f64_f32_e32 410 ; GCN: v_cvt_f64_f32_e32 411 ; GCN-NOT: v_cvt_f64_f32_e32 412 413 ; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 414 ; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 415 ; GCN: s_endpgm 416 define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 { 417 %val = load <3 x half>, <3 x half> addrspace(1)* %in 418 %cvt = fpext <3 x half> %val to <3 x double> 419 store <3 x double> %cvt, <3 x double> addrspace(1)* %out 420 ret void 421 } 422 423 ; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64: 424 define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 { 425 %val = load <4 x half>, <4 x half> addrspace(1)* %in 426 %cvt = fpext <4 x half> %val to <4 x double> 427 store <4 x double> %cvt, <4 x double> addrspace(1)* %out 428 ret void 429 } 430 431 ; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64: 432 define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 { 433 %val = load <8 x half>, <8 x half> addrspace(1)* %in 434 %cvt = fpext <8 x half> %val to <8 x double> 435 store <8 x double> %cvt, <8 x double> addrspace(1)* %out 436 ret void 437 } 438 439 ; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64: 440 define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 { 441 %val = load <16 x half>, <16 x half> addrspace(1)* %in 442 %cvt = fpext <16 x half> %val to <16 x double> 443 store <16 x double> %cvt, <16 x double> addrspace(1)* %out 444 ret void 445 } 446 447 ; GCN-LABEL: {{^}}global_truncstore_f32_to_f16: 448 ; GCN: buffer_load_dword [[LOAD:v[0-9]+]] 449 ; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]] 450 ; GCN: buffer_store_short [[CVT]] 451 define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 { 452 %val = load float, float addrspace(1)* %in 453 %cvt = fptrunc float %val to half 454 store half %cvt, half addrspace(1)* %out 455 ret void 456 } 457 458 ; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16: 459 ; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} 460 ; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]] 461 ; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]] 462 ; GCN-DAG: buffer_store_short [[CVT0]] 463 ; GCN-DAG: buffer_store_short [[CVT1]] 464 ; GCN: s_endpgm 465 define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 { 466 %val = load <2 x float>, <2 x float> addrspace(1)* %in 467 %cvt = fptrunc <2 x float> %val to <2 x half> 468 store <2 x half> %cvt, <2 x half> addrspace(1)* %out 469 ret void 470 } 471 472 ; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16: 473 ; GCN: buffer_load_dwordx4 474 ; GCN: v_cvt_f16_f32_e32 475 ; GCN: v_cvt_f16_f32_e32 476 ; GCN: v_cvt_f16_f32_e32 477 ; GCN-NOT: v_cvt_f16_f32_e32 478 ; GCN: buffer_store_short 479 ; GCN: buffer_store_dword 480 ; GCN: s_endpgm 481 define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 { 482 %val = load <3 x float>, <3 x float> addrspace(1)* %in 483 %cvt = fptrunc <3 x float> %val to <3 x half> 484 store <3 x half> %cvt, <3 x half> addrspace(1)* %out 485 ret void 486 } 487 488 ; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16: 489 ; GCN: buffer_load_dwordx4 490 ; GCN: v_cvt_f16_f32_e32 491 ; GCN: v_cvt_f16_f32_e32 492 ; GCN: v_cvt_f16_f32_e32 493 ; GCN: v_cvt_f16_f32_e32 494 ; GCN: buffer_store_short 495 ; GCN: buffer_store_short 496 ; GCN: buffer_store_short 497 ; GCN: buffer_store_short 498 ; GCN: s_endpgm 499 define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { 500 %val = load <4 x float>, <4 x float> addrspace(1)* %in 501 %cvt = fptrunc <4 x float> %val to <4 x half> 502 store <4 x half> %cvt, <4 x half> addrspace(1)* %out 503 ret void 504 } 505 506 ; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16: 507 ; GCN: buffer_load_dwordx4 508 ; GCN: buffer_load_dwordx4 509 ; GCN: v_cvt_f16_f32_e32 510 ; GCN: v_cvt_f16_f32_e32 511 ; GCN: v_cvt_f16_f32_e32 512 ; GCN: v_cvt_f16_f32_e32 513 ; GCN: v_cvt_f16_f32_e32 514 ; GCN: v_cvt_f16_f32_e32 515 ; GCN: v_cvt_f16_f32_e32 516 ; GCN: v_cvt_f16_f32_e32 517 ; GCN: buffer_store_short 518 ; GCN: buffer_store_short 519 ; GCN: buffer_store_short 520 ; GCN: buffer_store_short 521 ; GCN: buffer_store_short 522 ; GCN: buffer_store_short 523 ; GCN: buffer_store_short 524 ; GCN: buffer_store_short 525 ; GCN: s_endpgm 526 define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 { 527 %val = load <8 x float>, <8 x float> addrspace(1)* %in 528 %cvt = fptrunc <8 x float> %val to <8 x half> 529 store <8 x half> %cvt, <8 x half> addrspace(1)* %out 530 ret void 531 } 532 533 ; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16: 534 ; GCN: buffer_load_dwordx4 535 ; GCN: buffer_load_dwordx4 536 ; GCN: buffer_load_dwordx4 537 ; GCN: buffer_load_dwordx4 538 ; GCN-DAG: v_cvt_f16_f32_e32 539 ; GCN-DAG: v_cvt_f16_f32_e32 540 ; GCN-DAG: v_cvt_f16_f32_e32 541 ; GCN-DAG: v_cvt_f16_f32_e32 542 ; GCN-DAG: v_cvt_f16_f32_e32 543 ; GCN-DAG: v_cvt_f16_f32_e32 544 ; GCN-DAG: v_cvt_f16_f32_e32 545 ; GCN-DAG: v_cvt_f16_f32_e32 546 ; GCN-DAG: v_cvt_f16_f32_e32 547 ; GCN-DAG: v_cvt_f16_f32_e32 548 ; GCN-DAG: v_cvt_f16_f32_e32 549 ; GCN-DAG: v_cvt_f16_f32_e32 550 ; GCN-DAG: v_cvt_f16_f32_e32 551 ; GCN-DAG: v_cvt_f16_f32_e32 552 ; GCN-DAG: v_cvt_f16_f32_e32 553 ; GCN-DAG: v_cvt_f16_f32_e32 554 ; GCN-DAG: buffer_store_short 555 ; GCN-DAG: buffer_store_short 556 ; GCN-DAG: buffer_store_short 557 ; GCN-DAG: buffer_store_short 558 ; GCN-DAG: buffer_store_short 559 ; GCN-DAG: buffer_store_short 560 ; GCN-DAG: buffer_store_short 561 ; GCN-DAG: buffer_store_short 562 ; GCN-DAG: buffer_store_short 563 ; GCN-DAG: buffer_store_short 564 ; GCN-DAG: buffer_store_short 565 ; GCN-DAG: buffer_store_short 566 ; GCN-DAG: buffer_store_short 567 ; GCN-DAG: buffer_store_short 568 ; GCN-DAG: buffer_store_short 569 ; GCN-DAG: buffer_store_short 570 ; GCN: s_endpgm 571 define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 { 572 %val = load <16 x float>, <16 x float> addrspace(1)* %in 573 %cvt = fptrunc <16 x float> %val to <16 x half> 574 store <16 x half> %cvt, <16 x half> addrspace(1)* %out 575 ret void 576 } 577 578 ; FIXME: Unsafe math should fold conversions away 579 ; GCN-LABEL: {{^}}fadd_f16: 580 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}}, 581 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}}, 582 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}}, 583 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}}, 584 ; SI: v_add_f32 585 ; GCN: s_endpgm 586 define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 { 587 %add = fadd half %a, %b 588 store half %add, half addrspace(1)* %out, align 4 589 ret void 590 } 591 592 ; GCN-LABEL: {{^}}fadd_v2f16: 593 ; SI: v_add_f32 594 ; SI: v_add_f32 595 ; GCN: s_endpgm 596 define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 { 597 %add = fadd <2 x half> %a, %b 598 store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8 599 ret void 600 } 601 602 ; GCN-LABEL: {{^}}fadd_v4f16: 603 ; SI: v_add_f32 604 ; SI: v_add_f32 605 ; SI: v_add_f32 606 ; SI: v_add_f32 607 ; GCN: s_endpgm 608 define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 { 609 %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1 610 %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16 611 %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16 612 %result = fadd <4 x half> %a, %b 613 store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16 614 ret void 615 } 616 617 ; GCN-LABEL: {{^}}fadd_v8f16: 618 ; SI: v_add_f32 619 ; SI: v_add_f32 620 ; SI: v_add_f32 621 ; SI: v_add_f32 622 ; SI: v_add_f32 623 ; SI: v_add_f32 624 ; SI: v_add_f32 625 ; SI: v_add_f32 626 ; GCN: s_endpgm 627 define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 { 628 %add = fadd <8 x half> %a, %b 629 store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32 630 ret void 631 } 632 633 ; GCN-LABEL: {{^}}fsub_f16: 634 ; GCN: v_subrev_f32_e32 635 ; GCN: s_endpgm 636 define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { 637 %b_ptr = getelementptr half, half addrspace(1)* %in, i32 1 638 %a = load half, half addrspace(1)* %in 639 %b = load half, half addrspace(1)* %b_ptr 640 %sub = fsub half %a, %b 641 store half %sub, half addrspace(1)* %out 642 ret void 643 } 644 645 ; GCN-LABEL: {{^}}test_bitcast_from_half: 646 ; GCN: buffer_load_ushort [[TMP:v[0-9]+]] 647 ; GCN: buffer_store_short [[TMP]] 648 define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 { 649 %val = load half, half addrspace(1)* %in 650 %val_int = bitcast half %val to i16 651 store i16 %val_int, i16 addrspace(1)* %out 652 ret void 653 } 654 655 ; GCN-LABEL: {{^}}test_bitcast_to_half: 656 ; GCN: buffer_load_ushort [[TMP:v[0-9]+]] 657 ; GCN: buffer_store_short [[TMP]] 658 define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 659 %val = load i16, i16 addrspace(1)* %in 660 %val_fp = bitcast i16 %val to half 661 store half %val_fp, half addrspace(1)* %out 662 ret void 663 } 664 665 attributes #0 = { nounwind } 666