1 # RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s 2 --- | 3 define amdgpu_kernel void @add_f32_1.0_one_f16_use() #0 { 4 %f16.val0 = load volatile half, half addrspace(1)* undef 5 %f16.val1 = load volatile half, half addrspace(1)* undef 6 %f32.val = load volatile float, float addrspace(1)* undef 7 %f16.add0 = fadd half %f16.val0, 0xH3C00 8 %f32.add = fadd float %f32.val, 1.000000e+00 9 store volatile half %f16.add0, half addrspace(1)* undef 10 store volatile float %f32.add, float addrspace(1)* undef 11 ret void 12 } 13 14 define amdgpu_kernel void @add_f32_1.0_multi_f16_use() #0 { 15 %f16.val0 = load volatile half, half addrspace(1)* undef 16 %f16.val1 = load volatile half, half addrspace(1)* undef 17 %f32.val = load volatile float, float addrspace(1)* undef 18 %f16.add0 = fadd half %f16.val0, 0xH3C00 19 %f32.add = fadd float %f32.val, 1.000000e+00 20 store volatile half %f16.add0, half addrspace(1)* undef 21 store volatile float %f32.add, float addrspace(1)* undef 22 ret void 23 } 24 25 define amdgpu_kernel void @add_f32_1.0_one_f32_use_one_f16_use () #0 { 26 %f16.val0 = load volatile half, half addrspace(1)* undef 27 %f16.val1 = load volatile half, half addrspace(1)* undef 28 %f32.val = load volatile float, float addrspace(1)* undef 29 %f16.add0 = fadd half %f16.val0, 0xH3C00 30 %f32.add = fadd float %f32.val, 1.000000e+00 31 store volatile half %f16.add0, half addrspace(1)* undef 32 store volatile float %f32.add, float addrspace(1)* undef 33 ret void 34 } 35 36 define amdgpu_kernel void @add_f32_1.0_one_f32_use_multi_f16_use () #0 { 37 %f16.val0 = load volatile half, half addrspace(1)* undef 38 %f16.val1 = load volatile half, half addrspace(1)* undef 39 %f32.val = load volatile float, float addrspace(1)* undef 40 %f16.add0 = fadd half %f16.val0, 0xH3C00 41 %f16.add1 = fadd half %f16.val1, 0xH3C00 42 %f32.add = fadd float %f32.val, 1.000000e+00 43 store volatile half %f16.add0, half addrspace(1)* undef 44 store volatile half %f16.add1, half addrspace(1)* undef 45 store volatile float %f32.add, float addrspace(1)* undef 46 ret void 47 } 48 49 define amdgpu_kernel void @add_i32_1_multi_f16_use() #0 { 50 %f16.val0 = load volatile half, half addrspace(1)* undef 51 %f16.val1 = load volatile half, half addrspace(1)* undef 52 %f16.add0 = fadd half %f16.val0, 0xH0001 53 %f16.add1 = fadd half %f16.val1, 0xH0001 54 store volatile half %f16.add0, half addrspace(1)* undef 55 store volatile half %f16.add1,half addrspace(1)* undef 56 ret void 57 } 58 59 define amdgpu_kernel void @add_i32_m2_one_f32_use_multi_f16_use () #0 { 60 %f16.val0 = load volatile half, half addrspace(1)* undef 61 %f16.val1 = load volatile half, half addrspace(1)* undef 62 %f32.val = load volatile float, float addrspace(1)* undef 63 %f16.add0 = fadd half %f16.val0, 0xHFFFE 64 %f16.add1 = fadd half %f16.val1, 0xHFFFE 65 %f32.add = fadd float %f32.val, 0xffffffffc0000000 66 store volatile half %f16.add0, half addrspace(1)* undef 67 store volatile half %f16.add1, half addrspace(1)* undef 68 store volatile float %f32.add, float addrspace(1)* undef 69 ret void 70 } 71 72 define amdgpu_kernel void @add_f16_1.0_multi_f32_use() #0 { 73 %f32.val0 = load volatile float, float addrspace(1)* undef 74 %f32.val1 = load volatile float, float addrspace(1)* undef 75 %f32.val = load volatile float, float addrspace(1)* undef 76 %f32.add0 = fadd float %f32.val0, 1.0 77 %f32.add1 = fadd float %f32.val1, 1.0 78 store volatile float %f32.add0, float addrspace(1)* undef 79 store volatile float %f32.add1, float addrspace(1)* undef 80 ret void 81 } 82 83 define amdgpu_kernel void @add_f16_1.0_other_high_bits_multi_f16_use() #0 { 84 %f16.val0 = load volatile half, half addrspace(1)* undef 85 %f16.val1 = load volatile half, half addrspace(1)* undef 86 %f32.val = load volatile half, half addrspace(1)* undef 87 %f16.add0 = fadd half %f16.val0, 0xH3C00 88 %f32.add = fadd half %f32.val, 1.000000e+00 89 store volatile half %f16.add0, half addrspace(1)* undef 90 store volatile half %f32.add, half addrspace(1)* undef 91 ret void 92 } 93 94 define amdgpu_kernel void @add_f16_1.0_other_high_bits_use_f16_f32() #0 { 95 %f16.val0 = load volatile half, half addrspace(1)* undef 96 %f16.val1 = load volatile half, half addrspace(1)* undef 97 %f32.val = load volatile half, half addrspace(1)* undef 98 %f16.add0 = fadd half %f16.val0, 0xH3C00 99 %f32.add = fadd half %f32.val, 1.000000e+00 100 store volatile half %f16.add0, half addrspace(1)* undef 101 store volatile half %f32.add, half addrspace(1)* undef 102 ret void 103 } 104 105 attributes #0 = { nounwind } 106 107 ... 108 --- 109 110 # f32 1.0 with a single use should be folded as the low 32-bits of a 111 # literal constant. 112 113 # CHECK-LABEL: name: add_f32_1.0_one_f16_use 114 # CHECK: %13:vgpr_32 = V_ADD_F16_e32 1065353216, killed %11, implicit $exec 115 116 name: add_f32_1.0_one_f16_use 117 alignment: 0 118 exposesReturnsTwice: false 119 legalized: false 120 regBankSelected: false 121 selected: false 122 tracksRegLiveness: true 123 registers: 124 - { id: 0, class: sreg_64 } 125 - { id: 1, class: sreg_32 } 126 - { id: 2, class: sgpr_32 } 127 - { id: 3, class: vgpr_32 } 128 - { id: 4, class: sreg_64 } 129 - { id: 5, class: sreg_32 } 130 - { id: 6, class: sreg_64 } 131 - { id: 7, class: sreg_32 } 132 - { id: 8, class: sreg_32 } 133 - { id: 9, class: sreg_32 } 134 - { id: 10, class: sreg_128 } 135 - { id: 11, class: vgpr_32 } 136 - { id: 12, class: vgpr_32 } 137 - { id: 13, class: vgpr_32 } 138 frameInfo: 139 isFrameAddressTaken: false 140 isReturnAddressTaken: false 141 hasStackMap: false 142 hasPatchPoint: false 143 stackSize: 0 144 offsetAdjustment: 0 145 maxAlignment: 0 146 adjustsStack: false 147 hasCalls: false 148 maxCallFrameSize: 0 149 hasOpaqueSPAdjustment: false 150 hasVAStart: false 151 hasMustTailInVarArgFunc: false 152 body: | 153 bb.0 (%ir-block.0): 154 %4 = IMPLICIT_DEF 155 %5 = COPY %4.sub1 156 %6 = IMPLICIT_DEF 157 %7 = COPY %6.sub0 158 %8 = S_MOV_B32 61440 159 %9 = S_MOV_B32 -1 160 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 161 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 162 %12 = V_MOV_B32_e32 1065353216, implicit $exec 163 %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $exec 164 BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 165 S_ENDPGM 166 167 ... 168 --- 169 # Materialized f32 inline immediate should not be folded into the f16 170 # operands 171 172 # CHECK-LABEL: name: add_f32_1.0_multi_f16_use 173 # CHECK: %13:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec 174 # CHECK: %14:vgpr_32 = V_ADD_F16_e32 killed %11, %13, implicit $exec 175 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 killed %12, killed %13, implicit $exec 176 177 178 name: add_f32_1.0_multi_f16_use 179 alignment: 0 180 exposesReturnsTwice: false 181 legalized: false 182 regBankSelected: false 183 selected: false 184 tracksRegLiveness: true 185 registers: 186 - { id: 0, class: sreg_64 } 187 - { id: 1, class: sreg_32 } 188 - { id: 2, class: sgpr_32 } 189 - { id: 3, class: vgpr_32 } 190 - { id: 4, class: sreg_64 } 191 - { id: 5, class: sreg_32 } 192 - { id: 6, class: sreg_64 } 193 - { id: 7, class: sreg_32 } 194 - { id: 8, class: sreg_32 } 195 - { id: 9, class: sreg_32 } 196 - { id: 10, class: sreg_128 } 197 - { id: 11, class: vgpr_32 } 198 - { id: 12, class: vgpr_32 } 199 - { id: 13, class: vgpr_32 } 200 - { id: 14, class: vgpr_32 } 201 - { id: 15, class: vgpr_32 } 202 frameInfo: 203 isFrameAddressTaken: false 204 isReturnAddressTaken: false 205 hasStackMap: false 206 hasPatchPoint: false 207 stackSize: 0 208 offsetAdjustment: 0 209 maxAlignment: 0 210 adjustsStack: false 211 hasCalls: false 212 maxCallFrameSize: 0 213 hasOpaqueSPAdjustment: false 214 hasVAStart: false 215 hasMustTailInVarArgFunc: false 216 body: | 217 bb.0 (%ir-block.0): 218 %4 = IMPLICIT_DEF 219 %5 = COPY %4.sub1 220 %6 = IMPLICIT_DEF 221 %7 = COPY %6.sub0 222 %8 = S_MOV_B32 61440 223 %9 = S_MOV_B32 -1 224 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 225 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 226 %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 227 %13 = V_MOV_B32_e32 1065353216, implicit $exec 228 %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec 229 %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec 230 BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 231 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 232 S_ENDPGM 233 234 ... 235 --- 236 237 # f32 1.0 should be folded into the single f32 use as an inline 238 # immediate, and folded into the single f16 use as a literal constant 239 240 # CHECK-LABEL: name: add_f32_1.0_one_f32_use_one_f16_use 241 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $exec 242 # CHECK: %16:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $exec 243 244 name: add_f32_1.0_one_f32_use_one_f16_use 245 alignment: 0 246 exposesReturnsTwice: false 247 legalized: false 248 regBankSelected: false 249 selected: false 250 tracksRegLiveness: true 251 registers: 252 - { id: 0, class: sreg_64 } 253 - { id: 1, class: sreg_32 } 254 - { id: 2, class: sgpr_32 } 255 - { id: 3, class: vgpr_32 } 256 - { id: 4, class: sreg_64 } 257 - { id: 5, class: sreg_32 } 258 - { id: 6, class: sreg_64 } 259 - { id: 7, class: sreg_32 } 260 - { id: 8, class: sreg_32 } 261 - { id: 9, class: sreg_32 } 262 - { id: 10, class: sreg_128 } 263 - { id: 11, class: vgpr_32 } 264 - { id: 12, class: vgpr_32 } 265 - { id: 13, class: vgpr_32 } 266 - { id: 14, class: vgpr_32 } 267 - { id: 15, class: vgpr_32 } 268 - { id: 16, class: vgpr_32 } 269 frameInfo: 270 isFrameAddressTaken: false 271 isReturnAddressTaken: false 272 hasStackMap: false 273 hasPatchPoint: false 274 stackSize: 0 275 offsetAdjustment: 0 276 maxAlignment: 0 277 adjustsStack: false 278 hasCalls: false 279 maxCallFrameSize: 0 280 hasOpaqueSPAdjustment: false 281 hasVAStart: false 282 hasMustTailInVarArgFunc: false 283 body: | 284 bb.0 (%ir-block.0): 285 %4 = IMPLICIT_DEF 286 %5 = COPY %4.sub1 287 %6 = IMPLICIT_DEF 288 %7 = COPY %6.sub0 289 %8 = S_MOV_B32 61440 290 %9 = S_MOV_B32 -1 291 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 292 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 293 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 294 %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 295 %14 = V_MOV_B32_e32 1065353216, implicit $exec 296 %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec 297 %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec 298 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 299 BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) 300 S_ENDPGM 301 302 ... 303 --- 304 305 # f32 1.0 should be folded for the single f32 use as an inline 306 # constant, and not folded as a multi-use literal for the f16 cases 307 308 # CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use 309 # CHECK: %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec 310 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 %11, %14, implicit $exec 311 # CHECK: %16:vgpr_32 = V_ADD_F16_e32 %12, %14, implicit $exec 312 # CHECK: %17:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $exec 313 314 name: add_f32_1.0_one_f32_use_multi_f16_use 315 alignment: 0 316 exposesReturnsTwice: false 317 legalized: false 318 regBankSelected: false 319 selected: false 320 tracksRegLiveness: true 321 registers: 322 - { id: 0, class: sreg_64 } 323 - { id: 1, class: sreg_32 } 324 - { id: 2, class: sgpr_32 } 325 - { id: 3, class: vgpr_32 } 326 - { id: 4, class: sreg_64 } 327 - { id: 5, class: sreg_32 } 328 - { id: 6, class: sreg_64 } 329 - { id: 7, class: sreg_32 } 330 - { id: 8, class: sreg_32 } 331 - { id: 9, class: sreg_32 } 332 - { id: 10, class: sreg_128 } 333 - { id: 11, class: vgpr_32 } 334 - { id: 12, class: vgpr_32 } 335 - { id: 13, class: vgpr_32 } 336 - { id: 14, class: vgpr_32 } 337 - { id: 15, class: vgpr_32 } 338 - { id: 16, class: vgpr_32 } 339 - { id: 17, class: vgpr_32 } 340 frameInfo: 341 isFrameAddressTaken: false 342 isReturnAddressTaken: false 343 hasStackMap: false 344 hasPatchPoint: false 345 stackSize: 0 346 offsetAdjustment: 0 347 maxAlignment: 0 348 adjustsStack: false 349 hasCalls: false 350 maxCallFrameSize: 0 351 hasOpaqueSPAdjustment: false 352 hasVAStart: false 353 hasMustTailInVarArgFunc: false 354 body: | 355 bb.0 (%ir-block.0): 356 %4 = IMPLICIT_DEF 357 %5 = COPY %4.sub1 358 %6 = IMPLICIT_DEF 359 %7 = COPY %6.sub0 360 %8 = S_MOV_B32 61440 361 %9 = S_MOV_B32 -1 362 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 363 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 364 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 365 %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 366 %14 = V_MOV_B32_e32 1065353216, implicit $exec 367 %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec 368 %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec 369 %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec 370 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 371 BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 372 BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) 373 S_ENDPGM 374 375 ... 376 --- 377 # CHECK-LABEL: name: add_i32_1_multi_f16_use 378 # CHECK: %13:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 379 # CHECK: %14:vgpr_32 = V_ADD_F16_e32 1, killed %11, implicit $exec 380 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 1, killed %12, implicit $exec 381 382 383 name: add_i32_1_multi_f16_use 384 alignment: 0 385 exposesReturnsTwice: false 386 legalized: false 387 regBankSelected: false 388 selected: false 389 tracksRegLiveness: true 390 registers: 391 - { id: 0, class: sreg_64 } 392 - { id: 1, class: sreg_32 } 393 - { id: 2, class: sgpr_32 } 394 - { id: 3, class: vgpr_32 } 395 - { id: 4, class: sreg_64 } 396 - { id: 5, class: sreg_32 } 397 - { id: 6, class: sreg_64 } 398 - { id: 7, class: sreg_32 } 399 - { id: 8, class: sreg_32 } 400 - { id: 9, class: sreg_32 } 401 - { id: 10, class: sreg_128 } 402 - { id: 11, class: vgpr_32 } 403 - { id: 12, class: vgpr_32 } 404 - { id: 13, class: vgpr_32 } 405 - { id: 14, class: vgpr_32 } 406 - { id: 15, class: vgpr_32 } 407 frameInfo: 408 isFrameAddressTaken: false 409 isReturnAddressTaken: false 410 hasStackMap: false 411 hasPatchPoint: false 412 stackSize: 0 413 offsetAdjustment: 0 414 maxAlignment: 0 415 adjustsStack: false 416 hasCalls: false 417 maxCallFrameSize: 0 418 hasOpaqueSPAdjustment: false 419 hasVAStart: false 420 hasMustTailInVarArgFunc: false 421 body: | 422 bb.0 (%ir-block.0): 423 %4 = IMPLICIT_DEF 424 %5 = COPY %4.sub1 425 %6 = IMPLICIT_DEF 426 %7 = COPY %6.sub0 427 %8 = S_MOV_B32 61440 428 %9 = S_MOV_B32 -1 429 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 430 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 431 %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 432 %13 = V_MOV_B32_e32 1, implicit $exec 433 %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec 434 %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec 435 BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 436 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 437 S_ENDPGM 438 439 ... 440 --- 441 442 # CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use 443 # CHECK: %14:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec 444 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 -2, %11, implicit $exec 445 # CHECK: %16:vgpr_32 = V_ADD_F16_e32 -2, %12, implicit $exec 446 # CHECK: %17:vgpr_32 = V_ADD_F32_e32 -2, killed %13, implicit $exec 447 448 name: add_i32_m2_one_f32_use_multi_f16_use 449 alignment: 0 450 exposesReturnsTwice: false 451 legalized: false 452 regBankSelected: false 453 selected: false 454 tracksRegLiveness: true 455 registers: 456 - { id: 0, class: sreg_64 } 457 - { id: 1, class: sreg_32 } 458 - { id: 2, class: sgpr_32 } 459 - { id: 3, class: vgpr_32 } 460 - { id: 4, class: sreg_64 } 461 - { id: 5, class: sreg_32 } 462 - { id: 6, class: sreg_64 } 463 - { id: 7, class: sreg_32 } 464 - { id: 8, class: sreg_32 } 465 - { id: 9, class: sreg_32 } 466 - { id: 10, class: sreg_128 } 467 - { id: 11, class: vgpr_32 } 468 - { id: 12, class: vgpr_32 } 469 - { id: 13, class: vgpr_32 } 470 - { id: 14, class: vgpr_32 } 471 - { id: 15, class: vgpr_32 } 472 - { id: 16, class: vgpr_32 } 473 - { id: 17, class: vgpr_32 } 474 frameInfo: 475 isFrameAddressTaken: false 476 isReturnAddressTaken: false 477 hasStackMap: false 478 hasPatchPoint: false 479 stackSize: 0 480 offsetAdjustment: 0 481 maxAlignment: 0 482 adjustsStack: false 483 hasCalls: false 484 maxCallFrameSize: 0 485 hasOpaqueSPAdjustment: false 486 hasVAStart: false 487 hasMustTailInVarArgFunc: false 488 body: | 489 bb.0 (%ir-block.0): 490 %4 = IMPLICIT_DEF 491 %5 = COPY %4.sub1 492 %6 = IMPLICIT_DEF 493 %7 = COPY %6.sub0 494 %8 = S_MOV_B32 61440 495 %9 = S_MOV_B32 -1 496 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 497 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 498 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 499 %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 500 %14 = V_MOV_B32_e32 -2, implicit $exec 501 %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec 502 %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec 503 %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec 504 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 505 BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 506 BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) 507 S_ENDPGM 508 509 ... 510 --- 511 512 # f32 1.0 should be folded for the single f32 use as an inline 513 # constant, and not folded as a multi-use literal for the f16 cases 514 515 # CHECK-LABEL: name: add_f16_1.0_multi_f32_use 516 # CHECK: %13:vgpr_32 = V_MOV_B32_e32 15360, implicit $exec 517 # CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $exec 518 # CHECK: %15:vgpr_32 = V_ADD_F32_e32 %12, %13, implicit $exec 519 520 name: add_f16_1.0_multi_f32_use 521 alignment: 0 522 exposesReturnsTwice: false 523 legalized: false 524 regBankSelected: false 525 selected: false 526 tracksRegLiveness: true 527 registers: 528 - { id: 0, class: sreg_64 } 529 - { id: 1, class: sreg_32 } 530 - { id: 2, class: sgpr_32 } 531 - { id: 3, class: vgpr_32 } 532 - { id: 4, class: sreg_64 } 533 - { id: 5, class: sreg_32 } 534 - { id: 6, class: sreg_64 } 535 - { id: 7, class: sreg_32 } 536 - { id: 8, class: sreg_32 } 537 - { id: 9, class: sreg_32 } 538 - { id: 10, class: sreg_128 } 539 - { id: 11, class: vgpr_32 } 540 - { id: 12, class: vgpr_32 } 541 - { id: 13, class: vgpr_32 } 542 - { id: 14, class: vgpr_32 } 543 - { id: 15, class: vgpr_32 } 544 frameInfo: 545 isFrameAddressTaken: false 546 isReturnAddressTaken: false 547 hasStackMap: false 548 hasPatchPoint: false 549 stackSize: 0 550 offsetAdjustment: 0 551 maxAlignment: 0 552 adjustsStack: false 553 hasCalls: false 554 maxCallFrameSize: 0 555 hasOpaqueSPAdjustment: false 556 hasVAStart: false 557 hasMustTailInVarArgFunc: false 558 body: | 559 bb.0 (%ir-block.0): 560 %4 = IMPLICIT_DEF 561 %5 = COPY %4.sub1 562 %6 = IMPLICIT_DEF 563 %7 = COPY %6.sub0 564 %8 = S_MOV_B32 61440 565 %9 = S_MOV_B32 -1 566 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 567 %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 568 %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 569 %13 = V_MOV_B32_e32 15360, implicit $exec 570 %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec 571 %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $exec 572 BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) 573 BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) 574 S_ENDPGM 575 576 ... 577 --- 578 579 # The low 16-bits are an inline immediate, but the high bits are junk 580 # FIXME: Should be able to fold this 581 582 # CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use 583 # CHECK: %13:vgpr_32 = V_MOV_B32_e32 80886784, implicit $exec 584 # CHECK: %14:vgpr_32 = V_ADD_F16_e32 %11, %13, implicit $exec 585 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $exec 586 587 name: add_f16_1.0_other_high_bits_multi_f16_use 588 alignment: 0 589 exposesReturnsTwice: false 590 legalized: false 591 regBankSelected: false 592 selected: false 593 tracksRegLiveness: true 594 registers: 595 - { id: 0, class: sreg_64 } 596 - { id: 1, class: sreg_32 } 597 - { id: 2, class: sgpr_32 } 598 - { id: 3, class: vgpr_32 } 599 - { id: 4, class: sreg_64 } 600 - { id: 5, class: sreg_32 } 601 - { id: 6, class: sreg_64 } 602 - { id: 7, class: sreg_32 } 603 - { id: 8, class: sreg_32 } 604 - { id: 9, class: sreg_32 } 605 - { id: 10, class: sreg_128 } 606 - { id: 11, class: vgpr_32 } 607 - { id: 12, class: vgpr_32 } 608 - { id: 13, class: vgpr_32 } 609 - { id: 14, class: vgpr_32 } 610 - { id: 15, class: vgpr_32 } 611 frameInfo: 612 isFrameAddressTaken: false 613 isReturnAddressTaken: false 614 hasStackMap: false 615 hasPatchPoint: false 616 stackSize: 0 617 offsetAdjustment: 0 618 maxAlignment: 0 619 adjustsStack: false 620 hasCalls: false 621 maxCallFrameSize: 0 622 hasOpaqueSPAdjustment: false 623 hasVAStart: false 624 hasMustTailInVarArgFunc: false 625 body: | 626 bb.0 (%ir-block.0): 627 %4 = IMPLICIT_DEF 628 %5 = COPY %4.sub1 629 %6 = IMPLICIT_DEF 630 %7 = COPY %6.sub0 631 %8 = S_MOV_B32 61440 632 %9 = S_MOV_B32 -1 633 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 634 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 635 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 636 %13 = V_MOV_B32_e32 80886784, implicit $exec 637 %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $exec 638 %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec 639 BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 640 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 641 S_ENDPGM 642 643 ... 644 --- 645 646 # FIXME: Should fold inline immediate into f16 and literal use into 647 # f32 instruction. 648 649 # CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32 650 # CHECK: %13:vgpr_32 = V_MOV_B32_e32 305413120, implicit $exec 651 # CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $exec 652 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $exec 653 name: add_f16_1.0_other_high_bits_use_f16_f32 654 alignment: 0 655 exposesReturnsTwice: false 656 legalized: false 657 regBankSelected: false 658 selected: false 659 tracksRegLiveness: true 660 registers: 661 - { id: 0, class: sreg_64 } 662 - { id: 1, class: sreg_32 } 663 - { id: 2, class: sgpr_32 } 664 - { id: 3, class: vgpr_32 } 665 - { id: 4, class: sreg_64 } 666 - { id: 5, class: sreg_32 } 667 - { id: 6, class: sreg_64 } 668 - { id: 7, class: sreg_32 } 669 - { id: 8, class: sreg_32 } 670 - { id: 9, class: sreg_32 } 671 - { id: 10, class: sreg_128 } 672 - { id: 11, class: vgpr_32 } 673 - { id: 12, class: vgpr_32 } 674 - { id: 13, class: vgpr_32 } 675 - { id: 14, class: vgpr_32 } 676 - { id: 15, class: vgpr_32 } 677 frameInfo: 678 isFrameAddressTaken: false 679 isReturnAddressTaken: false 680 hasStackMap: false 681 hasPatchPoint: false 682 stackSize: 0 683 offsetAdjustment: 0 684 maxAlignment: 0 685 adjustsStack: false 686 hasCalls: false 687 maxCallFrameSize: 0 688 hasOpaqueSPAdjustment: false 689 hasVAStart: false 690 hasMustTailInVarArgFunc: false 691 body: | 692 bb.0 (%ir-block.0): 693 %4 = IMPLICIT_DEF 694 %5 = COPY %4.sub1 695 %6 = IMPLICIT_DEF 696 %7 = COPY %6.sub0 697 %8 = S_MOV_B32 61440 698 %9 = S_MOV_B32 -1 699 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 700 %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 701 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 702 %13 = V_MOV_B32_e32 305413120, implicit $exec 703 %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec 704 %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec 705 BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) 706 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 707 S_ENDPGM 708 709 ... 710