Home | History | Annotate | Download | only in AMDGPU
      1 # RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s
      2 --- |
      3   define amdgpu_kernel void @add_f32_1.0_one_f16_use() #0 {
      4     %f16.val0 = load volatile half, half addrspace(1)* undef
      5     %f16.val1 = load volatile half, half addrspace(1)* undef
      6     %f32.val = load volatile float, float addrspace(1)* undef
      7     %f16.add0 = fadd half %f16.val0, 0xH3C00
      8     %f32.add = fadd float %f32.val, 1.000000e+00
      9     store volatile half %f16.add0, half addrspace(1)* undef
     10     store volatile float %f32.add, float addrspace(1)* undef
     11     ret void
     12   }
     13 
     14   define amdgpu_kernel void @add_f32_1.0_multi_f16_use() #0 {
     15     %f16.val0 = load volatile half, half addrspace(1)* undef
     16     %f16.val1 = load volatile half, half addrspace(1)* undef
     17     %f32.val = load volatile float, float addrspace(1)* undef
     18     %f16.add0 = fadd half %f16.val0, 0xH3C00
     19     %f32.add = fadd float %f32.val, 1.000000e+00
     20     store volatile half %f16.add0, half addrspace(1)* undef
     21     store volatile float %f32.add, float addrspace(1)* undef
     22     ret void
     23   }
     24 
     25   define amdgpu_kernel void @add_f32_1.0_one_f32_use_one_f16_use () #0 {
     26     %f16.val0 = load volatile half, half addrspace(1)* undef
     27     %f16.val1 = load volatile half, half addrspace(1)* undef
     28     %f32.val = load volatile float, float addrspace(1)* undef
     29     %f16.add0 = fadd half %f16.val0, 0xH3C00
     30     %f32.add = fadd float %f32.val, 1.000000e+00
     31     store volatile half %f16.add0, half addrspace(1)* undef
     32     store volatile float %f32.add, float addrspace(1)* undef
     33     ret void
     34   }
     35 
     36   define amdgpu_kernel void @add_f32_1.0_one_f32_use_multi_f16_use () #0 {
     37     %f16.val0 = load volatile half, half addrspace(1)* undef
     38     %f16.val1 = load volatile half, half addrspace(1)* undef
     39     %f32.val = load volatile float, float addrspace(1)* undef
     40     %f16.add0 = fadd half %f16.val0, 0xH3C00
     41     %f16.add1 = fadd half %f16.val1, 0xH3C00
     42     %f32.add = fadd float %f32.val, 1.000000e+00
     43     store volatile half %f16.add0, half addrspace(1)* undef
     44     store volatile half %f16.add1, half addrspace(1)* undef
     45     store volatile float %f32.add, float addrspace(1)* undef
     46     ret void
     47   }
     48 
     49   define amdgpu_kernel void @add_i32_1_multi_f16_use() #0 {
     50     %f16.val0 = load volatile half, half addrspace(1)* undef
     51     %f16.val1 = load volatile half, half addrspace(1)* undef
     52     %f16.add0 = fadd half %f16.val0, 0xH0001
     53     %f16.add1 = fadd half %f16.val1, 0xH0001
     54     store volatile half %f16.add0, half addrspace(1)* undef
     55     store volatile half %f16.add1,half addrspace(1)* undef
     56     ret void
     57   }
     58 
     59   define amdgpu_kernel void @add_i32_m2_one_f32_use_multi_f16_use () #0 {
     60     %f16.val0 = load volatile half, half addrspace(1)* undef
     61     %f16.val1 = load volatile half, half addrspace(1)* undef
     62     %f32.val = load volatile float, float addrspace(1)* undef
     63     %f16.add0 = fadd half %f16.val0, 0xHFFFE
     64     %f16.add1 = fadd half %f16.val1, 0xHFFFE
     65     %f32.add = fadd float %f32.val, 0xffffffffc0000000
     66     store volatile half %f16.add0, half addrspace(1)* undef
     67     store volatile half %f16.add1, half addrspace(1)* undef
     68     store volatile float %f32.add, float addrspace(1)* undef
     69     ret void
     70   }
     71 
     72   define amdgpu_kernel void @add_f16_1.0_multi_f32_use() #0 {
     73     %f32.val0 = load volatile float, float addrspace(1)* undef
     74     %f32.val1 = load volatile float, float addrspace(1)* undef
     75     %f32.val = load volatile float, float addrspace(1)* undef
     76     %f32.add0 = fadd float %f32.val0, 1.0
     77     %f32.add1 = fadd float %f32.val1, 1.0
     78     store volatile float %f32.add0, float addrspace(1)* undef
     79     store volatile float %f32.add1, float addrspace(1)* undef
     80     ret void
     81   }
     82 
     83   define amdgpu_kernel void @add_f16_1.0_other_high_bits_multi_f16_use() #0 {
     84     %f16.val0 = load volatile half, half addrspace(1)* undef
     85     %f16.val1 = load volatile half, half addrspace(1)* undef
     86     %f32.val = load volatile half, half addrspace(1)* undef
     87     %f16.add0 = fadd half %f16.val0, 0xH3C00
     88     %f32.add = fadd half %f32.val, 1.000000e+00
     89     store volatile half %f16.add0, half addrspace(1)* undef
     90     store volatile half %f32.add, half addrspace(1)* undef
     91     ret void
     92   }
     93 
     94   define amdgpu_kernel void @add_f16_1.0_other_high_bits_use_f16_f32() #0 {
     95     %f16.val0 = load volatile half, half addrspace(1)* undef
     96     %f16.val1 = load volatile half, half addrspace(1)* undef
     97     %f32.val = load volatile half, half addrspace(1)* undef
     98     %f16.add0 = fadd half %f16.val0, 0xH3C00
     99     %f32.add = fadd half %f32.val, 1.000000e+00
    100     store volatile half %f16.add0, half addrspace(1)* undef
    101     store volatile half %f32.add, half addrspace(1)* undef
    102     ret void
    103   }
    104 
    105   attributes #0 = { nounwind }
    106 
    107 ...
    108 ---
    109 
    110 # f32 1.0 with a single use should be folded as the low 32-bits of a
    111 #  literal constant.
    112 
    113 # CHECK-LABEL: name: add_f32_1.0_one_f16_use
    114 # CHECK: %13:vgpr_32 = V_ADD_F16_e32  1065353216, killed %11, implicit $exec
    115 
    116 name:            add_f32_1.0_one_f16_use
    117 alignment:       0
    118 exposesReturnsTwice: false
    119 legalized:       false
    120 regBankSelected: false
    121 selected:        false
    122 tracksRegLiveness: true
    123 registers:
    124   - { id: 0, class: sreg_64 }
    125   - { id: 1, class: sreg_32 }
    126   - { id: 2, class: sgpr_32 }
    127   - { id: 3, class: vgpr_32 }
    128   - { id: 4, class: sreg_64 }
    129   - { id: 5, class: sreg_32 }
    130   - { id: 6, class: sreg_64 }
    131   - { id: 7, class: sreg_32 }
    132   - { id: 8, class: sreg_32 }
    133   - { id: 9, class: sreg_32 }
    134   - { id: 10, class: sreg_128 }
    135   - { id: 11, class: vgpr_32 }
    136   - { id: 12, class: vgpr_32 }
    137   - { id: 13, class: vgpr_32 }
    138 frameInfo:
    139   isFrameAddressTaken: false
    140   isReturnAddressTaken: false
    141   hasStackMap:     false
    142   hasPatchPoint:   false
    143   stackSize:       0
    144   offsetAdjustment: 0
    145   maxAlignment:    0
    146   adjustsStack:    false
    147   hasCalls:        false
    148   maxCallFrameSize: 0
    149   hasOpaqueSPAdjustment: false
    150   hasVAStart:      false
    151   hasMustTailInVarArgFunc: false
    152 body:             |
    153   bb.0 (%ir-block.0):
    154     %4 = IMPLICIT_DEF
    155     %5 = COPY %4.sub1
    156     %6 = IMPLICIT_DEF
    157     %7 = COPY %6.sub0
    158     %8 = S_MOV_B32 61440
    159     %9 = S_MOV_B32 -1
    160     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
    161     %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
    162     %12 = V_MOV_B32_e32 1065353216, implicit $exec
    163     %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $exec
    164     BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
    165     S_ENDPGM
    166 
    167 ...
    168 ---
    169 # Materialized f32 inline immediate should not be folded into the f16
    170 # operands
    171 
    172 # CHECK-LABEL: name: add_f32_1.0_multi_f16_use
    173 # CHECK: %13:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
    174 # CHECK: %14:vgpr_32 = V_ADD_F16_e32 killed %11, %13, implicit $exec
    175 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 killed %12, killed %13, implicit $exec
    176 
    177 
    178 name:            add_f32_1.0_multi_f16_use
    179 alignment:       0
    180 exposesReturnsTwice: false
    181 legalized:       false
    182 regBankSelected: false
    183 selected:        false
    184 tracksRegLiveness: true
    185 registers:
    186   - { id: 0, class: sreg_64 }
    187   - { id: 1, class: sreg_32 }
    188   - { id: 2, class: sgpr_32 }
    189   - { id: 3, class: vgpr_32 }
    190   - { id: 4, class: sreg_64 }
    191   - { id: 5, class: sreg_32 }
    192   - { id: 6, class: sreg_64 }
    193   - { id: 7, class: sreg_32 }
    194   - { id: 8, class: sreg_32 }
    195   - { id: 9, class: sreg_32 }
    196   - { id: 10, class: sreg_128 }
    197   - { id: 11, class: vgpr_32 }
    198   - { id: 12, class: vgpr_32 }
    199   - { id: 13, class: vgpr_32 }
    200   - { id: 14, class: vgpr_32 }
    201   - { id: 15, class: vgpr_32 }
    202 frameInfo:
    203   isFrameAddressTaken: false
    204   isReturnAddressTaken: false
    205   hasStackMap:     false
    206   hasPatchPoint:   false
    207   stackSize:       0
    208   offsetAdjustment: 0
    209   maxAlignment:    0
    210   adjustsStack:    false
    211   hasCalls:        false
    212   maxCallFrameSize: 0
    213   hasOpaqueSPAdjustment: false
    214   hasVAStart:      false
    215   hasMustTailInVarArgFunc: false
    216 body:             |
    217   bb.0 (%ir-block.0):
    218     %4 = IMPLICIT_DEF
    219     %5 = COPY %4.sub1
    220     %6 = IMPLICIT_DEF
    221     %7 = COPY %6.sub0
    222     %8 = S_MOV_B32 61440
    223     %9 = S_MOV_B32 -1
    224     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
    225     %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
    226     %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
    227     %13 = V_MOV_B32_e32 1065353216, implicit $exec
    228     %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec
    229     %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec
    230     BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
    231     BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
    232     S_ENDPGM
    233 
    234 ...
    235 ---
    236 
    237 # f32 1.0 should be folded into the single f32 use as an inline
    238 #  immediate, and folded into the single f16 use as a literal constant
    239 
    240 # CHECK-LABEL: name: add_f32_1.0_one_f32_use_one_f16_use
    241 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $exec
    242 # CHECK: %16:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $exec
    243 
    244 name:            add_f32_1.0_one_f32_use_one_f16_use
    245 alignment:       0
    246 exposesReturnsTwice: false
    247 legalized:       false
    248 regBankSelected: false
    249 selected:        false
    250 tracksRegLiveness: true
    251 registers:
    252   - { id: 0, class: sreg_64 }
    253   - { id: 1, class: sreg_32 }
    254   - { id: 2, class: sgpr_32 }
    255   - { id: 3, class: vgpr_32 }
    256   - { id: 4, class: sreg_64 }
    257   - { id: 5, class: sreg_32 }
    258   - { id: 6, class: sreg_64 }
    259   - { id: 7, class: sreg_32 }
    260   - { id: 8, class: sreg_32 }
    261   - { id: 9, class: sreg_32 }
    262   - { id: 10, class: sreg_128 }
    263   - { id: 11, class: vgpr_32 }
    264   - { id: 12, class: vgpr_32 }
    265   - { id: 13, class: vgpr_32 }
    266   - { id: 14, class: vgpr_32 }
    267   - { id: 15, class: vgpr_32 }
    268   - { id: 16, class: vgpr_32 }
    269 frameInfo:
    270   isFrameAddressTaken: false
    271   isReturnAddressTaken: false
    272   hasStackMap:     false
    273   hasPatchPoint:   false
    274   stackSize:       0
    275   offsetAdjustment: 0
    276   maxAlignment:    0
    277   adjustsStack:    false
    278   hasCalls:        false
    279   maxCallFrameSize: 0
    280   hasOpaqueSPAdjustment: false
    281   hasVAStart:      false
    282   hasMustTailInVarArgFunc: false
    283 body:             |
    284   bb.0 (%ir-block.0):
    285     %4 = IMPLICIT_DEF
    286     %5 = COPY %4.sub1
    287     %6 = IMPLICIT_DEF
    288     %7 = COPY %6.sub0
    289     %8 = S_MOV_B32 61440
    290     %9 = S_MOV_B32 -1
    291     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
    292     %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
    293     %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
    294     %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
    295     %14 = V_MOV_B32_e32 1065353216, implicit $exec
    296     %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
    297     %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
    298     BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
    299     BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
    300     S_ENDPGM
    301 
    302 ...
    303 ---
    304 
    305 # f32 1.0 should be folded for the single f32 use as an inline
    306 #  constant, and not folded as a multi-use literal for the f16 cases
    307 
    308 # CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use
    309 # CHECK: %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
    310 # CHECK: %15:vgpr_32 = V_ADD_F16_e32  %11, %14, implicit $exec
    311 # CHECK: %16:vgpr_32 = V_ADD_F16_e32 %12,  %14, implicit $exec
    312 # CHECK: %17:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $exec
    313 
    314 name:            add_f32_1.0_one_f32_use_multi_f16_use
    315 alignment:       0
    316 exposesReturnsTwice: false
    317 legalized:       false
    318 regBankSelected: false
    319 selected:        false
    320 tracksRegLiveness: true
    321 registers:
    322   - { id: 0, class: sreg_64 }
    323   - { id: 1, class: sreg_32 }
    324   - { id: 2, class: sgpr_32 }
    325   - { id: 3, class: vgpr_32 }
    326   - { id: 4, class: sreg_64 }
    327   - { id: 5, class: sreg_32 }
    328   - { id: 6, class: sreg_64 }
    329   - { id: 7, class: sreg_32 }
    330   - { id: 8, class: sreg_32 }
    331   - { id: 9, class: sreg_32 }
    332   - { id: 10, class: sreg_128 }
    333   - { id: 11, class: vgpr_32 }
    334   - { id: 12, class: vgpr_32 }
    335   - { id: 13, class: vgpr_32 }
    336   - { id: 14, class: vgpr_32 }
    337   - { id: 15, class: vgpr_32 }
    338   - { id: 16, class: vgpr_32 }
    339   - { id: 17, class: vgpr_32 }
    340 frameInfo:
    341   isFrameAddressTaken: false
    342   isReturnAddressTaken: false
    343   hasStackMap:     false
    344   hasPatchPoint:   false
    345   stackSize:       0
    346   offsetAdjustment: 0
    347   maxAlignment:    0
    348   adjustsStack:    false
    349   hasCalls:        false
    350   maxCallFrameSize: 0
    351   hasOpaqueSPAdjustment: false
    352   hasVAStart:      false
    353   hasMustTailInVarArgFunc: false
    354 body:             |
    355   bb.0 (%ir-block.0):
    356     %4 = IMPLICIT_DEF
    357     %5 = COPY %4.sub1
    358     %6 = IMPLICIT_DEF
    359     %7 = COPY %6.sub0
    360     %8 = S_MOV_B32 61440
    361     %9 = S_MOV_B32 -1
    362     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
    363     %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
    364     %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
    365     %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
    366     %14 = V_MOV_B32_e32 1065353216, implicit $exec
    367     %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
    368     %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec
    369     %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
    370     BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
    371     BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
    372     BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
    373     S_ENDPGM
    374 
    375 ...
    376 ---
    377 # CHECK-LABEL: name: add_i32_1_multi_f16_use
    378 # CHECK: %13:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
    379 # CHECK: %14:vgpr_32 = V_ADD_F16_e32 1, killed %11, implicit $exec
    380 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 1, killed %12, implicit $exec
    381 
    382 
    383 name:            add_i32_1_multi_f16_use
    384 alignment:       0
    385 exposesReturnsTwice: false
    386 legalized:       false
    387 regBankSelected: false
    388 selected:        false
    389 tracksRegLiveness: true
    390 registers:
    391   - { id: 0, class: sreg_64 }
    392   - { id: 1, class: sreg_32 }
    393   - { id: 2, class: sgpr_32 }
    394   - { id: 3, class: vgpr_32 }
    395   - { id: 4, class: sreg_64 }
    396   - { id: 5, class: sreg_32 }
    397   - { id: 6, class: sreg_64 }
    398   - { id: 7, class: sreg_32 }
    399   - { id: 8, class: sreg_32 }
    400   - { id: 9, class: sreg_32 }
    401   - { id: 10, class: sreg_128 }
    402   - { id: 11, class: vgpr_32 }
    403   - { id: 12, class: vgpr_32 }
    404   - { id: 13, class: vgpr_32 }
    405   - { id: 14, class: vgpr_32 }
    406   - { id: 15, class: vgpr_32 }
    407 frameInfo:
    408   isFrameAddressTaken: false
    409   isReturnAddressTaken: false
    410   hasStackMap:     false
    411   hasPatchPoint:   false
    412   stackSize:       0
    413   offsetAdjustment: 0
    414   maxAlignment:    0
    415   adjustsStack:    false
    416   hasCalls:        false
    417   maxCallFrameSize: 0
    418   hasOpaqueSPAdjustment: false
    419   hasVAStart:      false
    420   hasMustTailInVarArgFunc: false
    421 body:             |
    422   bb.0 (%ir-block.0):
    423     %4 = IMPLICIT_DEF
    424     %5 = COPY %4.sub1
    425     %6 = IMPLICIT_DEF
    426     %7 = COPY %6.sub0
    427     %8 = S_MOV_B32 61440
    428     %9 = S_MOV_B32 -1
    429     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
    430     %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
    431     %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
    432     %13 = V_MOV_B32_e32 1, implicit $exec
    433     %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec
    434     %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec
    435     BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
    436     BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
    437     S_ENDPGM
    438 
    439 ...
    440 ---
    441 
    442 # CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use
    443 # CHECK: %14:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec
    444 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 -2, %11, implicit $exec
    445 # CHECK: %16:vgpr_32 = V_ADD_F16_e32 -2, %12, implicit $exec
    446 # CHECK: %17:vgpr_32 = V_ADD_F32_e32 -2, killed %13, implicit $exec
    447 
    448 name:            add_i32_m2_one_f32_use_multi_f16_use
    449 alignment:       0
    450 exposesReturnsTwice: false
    451 legalized:       false
    452 regBankSelected: false
    453 selected:        false
    454 tracksRegLiveness: true
    455 registers:
    456   - { id: 0, class: sreg_64 }
    457   - { id: 1, class: sreg_32 }
    458   - { id: 2, class: sgpr_32 }
    459   - { id: 3, class: vgpr_32 }
    460   - { id: 4, class: sreg_64 }
    461   - { id: 5, class: sreg_32 }
    462   - { id: 6, class: sreg_64 }
    463   - { id: 7, class: sreg_32 }
    464   - { id: 8, class: sreg_32 }
    465   - { id: 9, class: sreg_32 }
    466   - { id: 10, class: sreg_128 }
    467   - { id: 11, class: vgpr_32 }
    468   - { id: 12, class: vgpr_32 }
    469   - { id: 13, class: vgpr_32 }
    470   - { id: 14, class: vgpr_32 }
    471   - { id: 15, class: vgpr_32 }
    472   - { id: 16, class: vgpr_32 }
    473   - { id: 17, class: vgpr_32 }
    474 frameInfo:
    475   isFrameAddressTaken: false
    476   isReturnAddressTaken: false
    477   hasStackMap:     false
    478   hasPatchPoint:   false
    479   stackSize:       0
    480   offsetAdjustment: 0
    481   maxAlignment:    0
    482   adjustsStack:    false
    483   hasCalls:        false
    484   maxCallFrameSize: 0
    485   hasOpaqueSPAdjustment: false
    486   hasVAStart:      false
    487   hasMustTailInVarArgFunc: false
    488 body:             |
    489   bb.0 (%ir-block.0):
    490     %4 = IMPLICIT_DEF
    491     %5 = COPY %4.sub1
    492     %6 = IMPLICIT_DEF
    493     %7 = COPY %6.sub0
    494     %8 = S_MOV_B32 61440
    495     %9 = S_MOV_B32 -1
    496     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
    497     %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
    498     %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
    499     %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
    500     %14 = V_MOV_B32_e32 -2, implicit $exec
    501     %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
    502     %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec
    503     %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
    504     BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
    505     BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
    506     BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
    507     S_ENDPGM
    508 
    509 ...
    510 ---
    511 
    512 # f32 1.0 should be folded for the single f32 use as an inline
    513 #  constant, and not folded as a multi-use literal for the f16 cases
    514 
    515 # CHECK-LABEL: name: add_f16_1.0_multi_f32_use
    516 # CHECK: %13:vgpr_32 = V_MOV_B32_e32 15360, implicit $exec
    517 # CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $exec
    518 # CHECK: %15:vgpr_32 = V_ADD_F32_e32 %12, %13, implicit $exec
    519 
    520 name:            add_f16_1.0_multi_f32_use
    521 alignment:       0
    522 exposesReturnsTwice: false
    523 legalized:       false
    524 regBankSelected: false
    525 selected:        false
    526 tracksRegLiveness: true
    527 registers:
    528   - { id: 0, class: sreg_64 }
    529   - { id: 1, class: sreg_32 }
    530   - { id: 2, class: sgpr_32 }
    531   - { id: 3, class: vgpr_32 }
    532   - { id: 4, class: sreg_64 }
    533   - { id: 5, class: sreg_32 }
    534   - { id: 6, class: sreg_64 }
    535   - { id: 7, class: sreg_32 }
    536   - { id: 8, class: sreg_32 }
    537   - { id: 9, class: sreg_32 }
    538   - { id: 10, class: sreg_128 }
    539   - { id: 11, class: vgpr_32 }
    540   - { id: 12, class: vgpr_32 }
    541   - { id: 13, class: vgpr_32 }
    542   - { id: 14, class: vgpr_32 }
    543   - { id: 15, class: vgpr_32 }
    544 frameInfo:
    545   isFrameAddressTaken: false
    546   isReturnAddressTaken: false
    547   hasStackMap:     false
    548   hasPatchPoint:   false
    549   stackSize:       0
    550   offsetAdjustment: 0
    551   maxAlignment:    0
    552   adjustsStack:    false
    553   hasCalls:        false
    554   maxCallFrameSize: 0
    555   hasOpaqueSPAdjustment: false
    556   hasVAStart:      false
    557   hasMustTailInVarArgFunc: false
    558 body:             |
    559   bb.0 (%ir-block.0):
    560     %4 = IMPLICIT_DEF
    561     %5 = COPY %4.sub1
    562     %6 = IMPLICIT_DEF
    563     %7 = COPY %6.sub0
    564     %8 = S_MOV_B32 61440
    565     %9 = S_MOV_B32 -1
    566     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
    567     %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
    568     %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
    569     %13 = V_MOV_B32_e32 15360, implicit $exec
    570     %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec
    571     %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $exec
    572     BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
    573     BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
    574     S_ENDPGM
    575 
    576 ...
    577 ---
    578 
    579 # The low 16-bits are an inline immediate, but the high bits are junk
    580 # FIXME: Should be able to fold this
    581 
    582 # CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use
    583 # CHECK: %13:vgpr_32 = V_MOV_B32_e32 80886784, implicit $exec
    584 # CHECK: %14:vgpr_32 = V_ADD_F16_e32 %11, %13, implicit $exec
    585 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $exec
    586 
    587 name:            add_f16_1.0_other_high_bits_multi_f16_use
    588 alignment:       0
    589 exposesReturnsTwice: false
    590 legalized:       false
    591 regBankSelected: false
    592 selected:        false
    593 tracksRegLiveness: true
    594 registers:
    595   - { id: 0, class: sreg_64 }
    596   - { id: 1, class: sreg_32 }
    597   - { id: 2, class: sgpr_32 }
    598   - { id: 3, class: vgpr_32 }
    599   - { id: 4, class: sreg_64 }
    600   - { id: 5, class: sreg_32 }
    601   - { id: 6, class: sreg_64 }
    602   - { id: 7, class: sreg_32 }
    603   - { id: 8, class: sreg_32 }
    604   - { id: 9, class: sreg_32 }
    605   - { id: 10, class: sreg_128 }
    606   - { id: 11, class: vgpr_32 }
    607   - { id: 12, class: vgpr_32 }
    608   - { id: 13, class: vgpr_32 }
    609   - { id: 14, class: vgpr_32 }
    610   - { id: 15, class: vgpr_32 }
    611 frameInfo:
    612   isFrameAddressTaken: false
    613   isReturnAddressTaken: false
    614   hasStackMap:     false
    615   hasPatchPoint:   false
    616   stackSize:       0
    617   offsetAdjustment: 0
    618   maxAlignment:    0
    619   adjustsStack:    false
    620   hasCalls:        false
    621   maxCallFrameSize: 0
    622   hasOpaqueSPAdjustment: false
    623   hasVAStart:      false
    624   hasMustTailInVarArgFunc: false
    625 body:             |
    626   bb.0 (%ir-block.0):
    627     %4 = IMPLICIT_DEF
    628     %5 = COPY %4.sub1
    629     %6 = IMPLICIT_DEF
    630     %7 = COPY %6.sub0
    631     %8 = S_MOV_B32 61440
    632     %9 = S_MOV_B32 -1
    633     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
    634     %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
    635     %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
    636     %13 = V_MOV_B32_e32 80886784, implicit $exec
    637     %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $exec
    638     %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec
    639     BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
    640     BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
    641     S_ENDPGM
    642 
    643 ...
    644 ---
    645 
    646 # FIXME: Should fold inline immediate into f16 and literal use into
    647 # f32 instruction.
    648 
    649 # CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32
    650 # CHECK: %13:vgpr_32 = V_MOV_B32_e32 305413120, implicit $exec
    651 # CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $exec
    652 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $exec
    653 name:            add_f16_1.0_other_high_bits_use_f16_f32
    654 alignment:       0
    655 exposesReturnsTwice: false
    656 legalized:       false
    657 regBankSelected: false
    658 selected:        false
    659 tracksRegLiveness: true
    660 registers:
    661   - { id: 0, class: sreg_64 }
    662   - { id: 1, class: sreg_32 }
    663   - { id: 2, class: sgpr_32 }
    664   - { id: 3, class: vgpr_32 }
    665   - { id: 4, class: sreg_64 }
    666   - { id: 5, class: sreg_32 }
    667   - { id: 6, class: sreg_64 }
    668   - { id: 7, class: sreg_32 }
    669   - { id: 8, class: sreg_32 }
    670   - { id: 9, class: sreg_32 }
    671   - { id: 10, class: sreg_128 }
    672   - { id: 11, class: vgpr_32 }
    673   - { id: 12, class: vgpr_32 }
    674   - { id: 13, class: vgpr_32 }
    675   - { id: 14, class: vgpr_32 }
    676   - { id: 15, class: vgpr_32 }
    677 frameInfo:
    678   isFrameAddressTaken: false
    679   isReturnAddressTaken: false
    680   hasStackMap:     false
    681   hasPatchPoint:   false
    682   stackSize:       0
    683   offsetAdjustment: 0
    684   maxAlignment:    0
    685   adjustsStack:    false
    686   hasCalls:        false
    687   maxCallFrameSize: 0
    688   hasOpaqueSPAdjustment: false
    689   hasVAStart:      false
    690   hasMustTailInVarArgFunc: false
    691 body:             |
    692   bb.0 (%ir-block.0):
    693     %4 = IMPLICIT_DEF
    694     %5 = COPY %4.sub1
    695     %6 = IMPLICIT_DEF
    696     %7 = COPY %6.sub0
    697     %8 = S_MOV_B32 61440
    698     %9 = S_MOV_B32 -1
    699     %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
    700     %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
    701     %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
    702     %13 = V_MOV_B32_e32 305413120, implicit $exec
    703     %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec
    704     %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec
    705     BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
    706     BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
    707     S_ENDPGM
    708 
    709 ...
    710