Home | History | Annotate | Download | only in AMDGPU
      1 # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands  %s -o - | FileCheck -check-prefix=GCN %s
      2 ---
      3 # GCN-LABEL: name: v_max_self_clamp_not_set_f32
      4 # GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
      5 # GCN-NEXT: %21:vgpr_32 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $exec
      6 
      7 name:            v_max_self_clamp_not_set_f32
      8 tracksRegLiveness: true
      9 registers:
     10   - { id: 0, class: sgpr_64 }
     11   - { id: 1, class: sreg_32_xm0 }
     12   - { id: 2, class: sgpr_32 }
     13   - { id: 3, class: vgpr_32 }
     14   - { id: 4, class: sreg_64_xexec }
     15   - { id: 5, class: sreg_64_xexec }
     16   - { id: 6, class: sreg_32 }
     17   - { id: 7, class: sreg_32 }
     18   - { id: 8, class: sreg_32_xm0 }
     19   - { id: 9, class: sreg_64 }
     20   - { id: 10, class: sreg_32_xm0 }
     21   - { id: 11, class: sreg_32_xm0 }
     22   - { id: 12, class: sgpr_64 }
     23   - { id: 13, class: sgpr_128 }
     24   - { id: 14, class: sreg_32_xm0 }
     25   - { id: 15, class: sreg_64 }
     26   - { id: 16, class: sgpr_128 }
     27   - { id: 17, class: vgpr_32 }
     28   - { id: 18, class: vreg_64 }
     29   - { id: 19, class: vgpr_32 }
     30   - { id: 20, class: vgpr_32 }
     31   - { id: 21, class: vgpr_32 }
     32   - { id: 22, class: vgpr_32 }
     33   - { id: 23, class: vreg_64 }
     34   - { id: 24, class: vgpr_32 }
     35   - { id: 25, class: vreg_64 }
     36   - { id: 26, class: vreg_64 }
     37 liveins:
     38   - { reg: '$sgpr0_sgpr1', virtual-reg: '%0' }
     39   - { reg: '$vgpr0', virtual-reg: '%3' }
     40 body:             |
     41   bb.0:
     42     liveins: $sgpr0_sgpr1, $vgpr0
     43 
     44     %3 = COPY $vgpr0
     45     %0 = COPY $sgpr0_sgpr1
     46     %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
     47     %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
     48     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     49     %25 = REG_SEQUENCE %3, 1, %24, 2
     50     %10 = S_MOV_B32 61440
     51     %11 = S_MOV_B32 0
     52     %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
     53     %13 = REG_SEQUENCE killed %5, 17, %12, 18
     54     %14 = S_MOV_B32 2
     55     %26 = V_LSHL_B64 killed %25, 2, implicit $exec
     56     %16 = REG_SEQUENCE killed %4, 17, %12, 18
     57     %18 = COPY %26
     58     %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
     59     %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
     60     %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $exec
     61     BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
     62     S_ENDPGM
     63 
     64 ...
     65 ---
     66 # GCN-LABEL: name: v_clamp_omod_already_set_f32
     67 # GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
     68 # GCN: %21:vgpr_32 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $exec
     69 name:            v_clamp_omod_already_set_f32
     70 tracksRegLiveness: true
     71 registers:
     72   - { id: 0, class: sgpr_64 }
     73   - { id: 1, class: sreg_32_xm0 }
     74   - { id: 2, class: sgpr_32 }
     75   - { id: 3, class: vgpr_32 }
     76   - { id: 4, class: sreg_64_xexec }
     77   - { id: 5, class: sreg_64_xexec }
     78   - { id: 6, class: sreg_32 }
     79   - { id: 7, class: sreg_32 }
     80   - { id: 8, class: sreg_32_xm0 }
     81   - { id: 9, class: sreg_64 }
     82   - { id: 10, class: sreg_32_xm0 }
     83   - { id: 11, class: sreg_32_xm0 }
     84   - { id: 12, class: sgpr_64 }
     85   - { id: 13, class: sgpr_128 }
     86   - { id: 14, class: sreg_32_xm0 }
     87   - { id: 15, class: sreg_64 }
     88   - { id: 16, class: sgpr_128 }
     89   - { id: 17, class: vgpr_32 }
     90   - { id: 18, class: vreg_64 }
     91   - { id: 19, class: vgpr_32 }
     92   - { id: 20, class: vgpr_32 }
     93   - { id: 21, class: vgpr_32 }
     94   - { id: 22, class: vgpr_32 }
     95   - { id: 23, class: vreg_64 }
     96   - { id: 24, class: vgpr_32 }
     97   - { id: 25, class: vreg_64 }
     98   - { id: 26, class: vreg_64 }
     99 liveins:
    100   - { reg: '$sgpr0_sgpr1', virtual-reg: '%0' }
    101   - { reg: '$vgpr0', virtual-reg: '%3' }
    102 body:             |
    103   bb.0:
    104     liveins: $sgpr0_sgpr1, $vgpr0
    105 
    106     %3 = COPY $vgpr0
    107     %0 = COPY $sgpr0_sgpr1
    108     %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    109     %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    110     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
    111     %25 = REG_SEQUENCE %3, 1, %24, 2
    112     %10 = S_MOV_B32 61440
    113     %11 = S_MOV_B32 0
    114     %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
    115     %13 = REG_SEQUENCE killed %5, 17, %12, 18
    116     %14 = S_MOV_B32 2
    117     %26 = V_LSHL_B64 killed %25, 2, implicit $exec
    118     %16 = REG_SEQUENCE killed %4, 17, %12, 18
    119     %18 = COPY %26
    120     %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
    121     %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
    122     %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $exec
    123     BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
    124     S_ENDPGM
    125 ...
    126 ---
    127 # Don't fold a mul that looks like an omod if itself has omod set
    128 
    129 # GCN-LABEL: name: v_omod_mul_omod_already_set_f32
    130 # GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
    131 # GCN-NEXT: %21:vgpr_32 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $exec
    132 name:            v_omod_mul_omod_already_set_f32
    133 tracksRegLiveness: true
    134 registers:
    135   - { id: 0, class: sgpr_64 }
    136   - { id: 1, class: sreg_32_xm0 }
    137   - { id: 2, class: sgpr_32 }
    138   - { id: 3, class: vgpr_32 }
    139   - { id: 4, class: sreg_64_xexec }
    140   - { id: 5, class: sreg_64_xexec }
    141   - { id: 6, class: sreg_32 }
    142   - { id: 7, class: sreg_32 }
    143   - { id: 8, class: sreg_32_xm0 }
    144   - { id: 9, class: sreg_64 }
    145   - { id: 10, class: sreg_32_xm0 }
    146   - { id: 11, class: sreg_32_xm0 }
    147   - { id: 12, class: sgpr_64 }
    148   - { id: 13, class: sgpr_128 }
    149   - { id: 14, class: sreg_32_xm0 }
    150   - { id: 15, class: sreg_64 }
    151   - { id: 16, class: sgpr_128 }
    152   - { id: 17, class: vgpr_32 }
    153   - { id: 18, class: vreg_64 }
    154   - { id: 19, class: vgpr_32 }
    155   - { id: 20, class: vgpr_32 }
    156   - { id: 21, class: vgpr_32 }
    157   - { id: 22, class: vgpr_32 }
    158   - { id: 23, class: vreg_64 }
    159   - { id: 24, class: vgpr_32 }
    160   - { id: 25, class: vreg_64 }
    161   - { id: 26, class: vreg_64 }
    162 liveins:
    163   - { reg: '$sgpr0_sgpr1', virtual-reg: '%0' }
    164   - { reg: '$vgpr0', virtual-reg: '%3' }
    165 body:             |
    166   bb.0:
    167     liveins: $sgpr0_sgpr1, $vgpr0
    168 
    169     %3 = COPY $vgpr0
    170     %0 = COPY $sgpr0_sgpr1
    171     %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    172     %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    173     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
    174     %25 = REG_SEQUENCE %3, 1, %24, 2
    175     %10 = S_MOV_B32 61440
    176     %11 = S_MOV_B32 0
    177     %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
    178     %13 = REG_SEQUENCE killed %5, 17, %12, 18
    179     %14 = S_MOV_B32 2
    180     %26 = V_LSHL_B64 killed %25, 2, implicit $exec
    181     %16 = REG_SEQUENCE killed %4, 17, %12, 18
    182     %18 = COPY %26
    183     %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
    184     %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
    185     %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $exec
    186     BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
    187     S_ENDPGM
    188 
    189 ...
    190 ---
    191 # Don't fold a mul that looks like an omod if itself has clamp set
    192 # This might be OK, but would require folding the clamp at the same time.
    193 # GCN-LABEL: name: v_omod_mul_clamp_already_set_f32
    194 # GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
    195 # GCN-NEXT: %21:vgpr_32 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $exec
    196 
    197 name:            v_omod_mul_clamp_already_set_f32
    198 tracksRegLiveness: true
    199 registers:
    200   - { id: 0, class: sgpr_64 }
    201   - { id: 1, class: sreg_32_xm0 }
    202   - { id: 2, class: sgpr_32 }
    203   - { id: 3, class: vgpr_32 }
    204   - { id: 4, class: sreg_64_xexec }
    205   - { id: 5, class: sreg_64_xexec }
    206   - { id: 6, class: sreg_32 }
    207   - { id: 7, class: sreg_32 }
    208   - { id: 8, class: sreg_32_xm0 }
    209   - { id: 9, class: sreg_64 }
    210   - { id: 10, class: sreg_32_xm0 }
    211   - { id: 11, class: sreg_32_xm0 }
    212   - { id: 12, class: sgpr_64 }
    213   - { id: 13, class: sgpr_128 }
    214   - { id: 14, class: sreg_32_xm0 }
    215   - { id: 15, class: sreg_64 }
    216   - { id: 16, class: sgpr_128 }
    217   - { id: 17, class: vgpr_32 }
    218   - { id: 18, class: vreg_64 }
    219   - { id: 19, class: vgpr_32 }
    220   - { id: 20, class: vgpr_32 }
    221   - { id: 21, class: vgpr_32 }
    222   - { id: 22, class: vgpr_32 }
    223   - { id: 23, class: vreg_64 }
    224   - { id: 24, class: vgpr_32 }
    225   - { id: 25, class: vreg_64 }
    226   - { id: 26, class: vreg_64 }
    227 liveins:
    228   - { reg: '$sgpr0_sgpr1', virtual-reg: '%0' }
    229   - { reg: '$vgpr0', virtual-reg: '%3' }
    230 body:             |
    231   bb.0:
    232     liveins: $sgpr0_sgpr1, $vgpr0
    233 
    234     %3 = COPY $vgpr0
    235     %0 = COPY $sgpr0_sgpr1
    236     %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    237     %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    238     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
    239     %25 = REG_SEQUENCE %3, 1, %24, 2
    240     %10 = S_MOV_B32 61440
    241     %11 = S_MOV_B32 0
    242     %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
    243     %13 = REG_SEQUENCE killed %5, 17, %12, 18
    244     %14 = S_MOV_B32 2
    245     %26 = V_LSHL_B64 killed %25, 2, implicit $exec
    246     %16 = REG_SEQUENCE killed %4, 17, %12, 18
    247     %18 = COPY %26
    248     %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
    249     %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
    250     %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $exec
    251     BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
    252     S_ENDPGM
    253 
    254 ...
    255 
    256 
    257 
    258 
    259 
    260 
    261 
    262 
    263 
    264 
    265 
    266 
    267 
    268 ---
    269 # Don't fold a mul that looks like an omod if itself has omod set
    270 
    271 # GCN-LABEL: name: v_omod_add_omod_already_set_f32
    272 # GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
    273 # GCN-NEXT: %21:vgpr_32 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $exec
    274 name:            v_omod_add_omod_already_set_f32
    275 tracksRegLiveness: true
    276 registers:
    277   - { id: 0, class: sgpr_64 }
    278   - { id: 1, class: sreg_32_xm0 }
    279   - { id: 2, class: sgpr_32 }
    280   - { id: 3, class: vgpr_32 }
    281   - { id: 4, class: sreg_64_xexec }
    282   - { id: 5, class: sreg_64_xexec }
    283   - { id: 6, class: sreg_32 }
    284   - { id: 7, class: sreg_32 }
    285   - { id: 8, class: sreg_32_xm0 }
    286   - { id: 9, class: sreg_64 }
    287   - { id: 10, class: sreg_32_xm0 }
    288   - { id: 11, class: sreg_32_xm0 }
    289   - { id: 12, class: sgpr_64 }
    290   - { id: 13, class: sgpr_128 }
    291   - { id: 14, class: sreg_32_xm0 }
    292   - { id: 15, class: sreg_64 }
    293   - { id: 16, class: sgpr_128 }
    294   - { id: 17, class: vgpr_32 }
    295   - { id: 18, class: vreg_64 }
    296   - { id: 19, class: vgpr_32 }
    297   - { id: 20, class: vgpr_32 }
    298   - { id: 21, class: vgpr_32 }
    299   - { id: 22, class: vgpr_32 }
    300   - { id: 23, class: vreg_64 }
    301   - { id: 24, class: vgpr_32 }
    302   - { id: 25, class: vreg_64 }
    303   - { id: 26, class: vreg_64 }
    304 liveins:
    305   - { reg: '$sgpr0_sgpr1', virtual-reg: '%0' }
    306   - { reg: '$vgpr0', virtual-reg: '%3' }
    307 body:             |
    308   bb.0:
    309     liveins: $sgpr0_sgpr1, $vgpr0
    310 
    311     %3 = COPY $vgpr0
    312     %0 = COPY $sgpr0_sgpr1
    313     %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    314     %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    315     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
    316     %25 = REG_SEQUENCE %3, 1, %24, 2
    317     %10 = S_MOV_B32 61440
    318     %11 = S_MOV_B32 0
    319     %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
    320     %13 = REG_SEQUENCE killed %5, 17, %12, 18
    321     %14 = S_MOV_B32 2
    322     %26 = V_LSHL_B64 killed %25, 2, implicit $exec
    323     %16 = REG_SEQUENCE killed %4, 17, %12, 18
    324     %18 = COPY %26
    325     %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
    326     %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
    327     %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $exec
    328     BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
    329     S_ENDPGM
    330 
    331 ...
    332 ---
    333 # Don't fold a mul that looks like an omod if itself has clamp set
    334 # This might be OK, but would require folding the clamp at the same time.
    335 # GCN-LABEL: name: v_omod_add_clamp_already_set_f32
    336 # GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
    337 # GCN-NEXT: %21:vgpr_32 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $exec
    338 
    339 name:            v_omod_add_clamp_already_set_f32
    340 tracksRegLiveness: true
    341 registers:
    342   - { id: 0, class: sgpr_64 }
    343   - { id: 1, class: sreg_32_xm0 }
    344   - { id: 2, class: sgpr_32 }
    345   - { id: 3, class: vgpr_32 }
    346   - { id: 4, class: sreg_64_xexec }
    347   - { id: 5, class: sreg_64_xexec }
    348   - { id: 6, class: sreg_32 }
    349   - { id: 7, class: sreg_32 }
    350   - { id: 8, class: sreg_32_xm0 }
    351   - { id: 9, class: sreg_64 }
    352   - { id: 10, class: sreg_32_xm0 }
    353   - { id: 11, class: sreg_32_xm0 }
    354   - { id: 12, class: sgpr_64 }
    355   - { id: 13, class: sgpr_128 }
    356   - { id: 14, class: sreg_32_xm0 }
    357   - { id: 15, class: sreg_64 }
    358   - { id: 16, class: sgpr_128 }
    359   - { id: 17, class: vgpr_32 }
    360   - { id: 18, class: vreg_64 }
    361   - { id: 19, class: vgpr_32 }
    362   - { id: 20, class: vgpr_32 }
    363   - { id: 21, class: vgpr_32 }
    364   - { id: 22, class: vgpr_32 }
    365   - { id: 23, class: vreg_64 }
    366   - { id: 24, class: vgpr_32 }
    367   - { id: 25, class: vreg_64 }
    368   - { id: 26, class: vreg_64 }
    369 liveins:
    370   - { reg: '$sgpr0_sgpr1', virtual-reg: '%0' }
    371   - { reg: '$vgpr0', virtual-reg: '%3' }
    372 body:             |
    373   bb.0:
    374     liveins: $sgpr0_sgpr1, $vgpr0
    375 
    376     %3 = COPY $vgpr0
    377     %0 = COPY $sgpr0_sgpr1
    378     %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    379     %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    380     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
    381     %25 = REG_SEQUENCE %3, 1, %24, 2
    382     %10 = S_MOV_B32 61440
    383     %11 = S_MOV_B32 0
    384     %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
    385     %13 = REG_SEQUENCE killed %5, 17, %12, 18
    386     %14 = S_MOV_B32 2
    387     %26 = V_LSHL_B64 killed %25, 2, implicit $exec
    388     %16 = REG_SEQUENCE killed %4, 17, %12, 18
    389     %18 = COPY %26
    390     %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
    391     %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
    392     %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $exec
    393     BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
    394     S_ENDPGM
    395 
    396 ...
    397 ---
    398 
    399 # Pass used to crash with immediate second operand of max
    400 name:            v_max_reg_imm_f32
    401 tracksRegLiveness: true
    402 registers:
    403   - { id: 0, class: vgpr_32 }
    404   - { id: 1, class: vgpr_32 }
    405 body:             |
    406   bb.0:
    407     liveins: $vgpr0
    408 
    409     %0 = COPY $vgpr0
    410     %1 = V_MAX_F32_e64 0, killed %0, 0, 1056964608, 1, 0, implicit $exec
    411 
    412 ...
    413