Home | History | Annotate | Download | only in AMDGPU
      1 # RUN: llc -march=amdgcn -mcpu=fiji -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
      2 # RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
      3 
      4 # SDWA-LABEL: {{^}}add_f16_u32_preserve
      5 
      6 # SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
      7 # SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
      8 
      9 # SDWA: v_mul_f32_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_3
     10 # SDWA: v_add_f16_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:WORD_1
     11 
     12 # SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[RES]]
     13 
     14 ---
     15 name:            add_f16_u32_preserve
     16 tracksRegLiveness: true
     17 registers:       
     18   - { id: 0, class: vreg_64 }
     19   - { id: 1, class: vreg_64 }
     20   - { id: 2, class: sreg_64 }
     21   - { id: 3, class: vgpr_32 }
     22   - { id: 4, class: vgpr_32 }
     23   - { id: 5, class: vgpr_32 }
     24   - { id: 6, class: vgpr_32 }
     25   - { id: 7, class: vgpr_32 }
     26   - { id: 8, class: vgpr_32 }
     27   - { id: 9, class: vgpr_32 }
     28   - { id: 10, class: vgpr_32 }
     29   - { id: 11, class: vgpr_32 }
     30   - { id: 12, class: vgpr_32 }
     31   - { id: 13, class: vgpr_32 }
     32 body:             |
     33   bb.0:
     34     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
     35   
     36     %2 = COPY $sgpr30_sgpr31
     37     %1 = COPY $vgpr2_vgpr3
     38     %0 = COPY $vgpr0_vgpr1
     39     %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
     40     %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
     41 
     42     %5 = V_AND_B32_e32 65535, %3, implicit $exec
     43     %6 = V_LSHRREV_B32_e64 16, %4, implicit $exec
     44     %7 = V_BFE_U32 %3, 8, 8, implicit $exec
     45     %8 = V_LSHRREV_B32_e32 24, %4, implicit $exec
     46 
     47     %9 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $exec
     48     %10 = V_LSHLREV_B16_e64 8, %9, implicit $exec
     49     %11 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $exec
     50     %12 = V_LSHLREV_B32_e64 16, %11, implicit $exec
     51 
     52     %13 = V_OR_B32_e64 %10, %12, implicit $exec
     53 
     54     FLAT_STORE_DWORD %0, %13, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     55     $sgpr30_sgpr31 = COPY %2
     56     S_SETPC_B64_return $sgpr30_sgpr31
     57 
     58 ---
     59 # SDWA-LABEL: sdwa_preserve_keep
     60 # SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
     61 # SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
     62 
     63 # SDWA: v_and_b32_e32 [[AND:v[0-9]+]], 0xff, [[FIRST]]
     64 # SDWA: v_mov_b32_sdwa [[AND]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
     65 
     66 # SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[AND]]
     67 
     68 name:            sdwa_preserve_keep
     69 tracksRegLiveness: true
     70 registers:
     71   - { id: 0, class: vreg_64 }
     72   - { id: 1, class: vreg_64 }
     73   - { id: 2, class: sreg_64 }
     74   - { id: 3, class: vgpr_32 }
     75   - { id: 4, class: vgpr_32 }
     76   - { id: 5, class: sreg_32_xm0_xexec }
     77   - { id: 6, class: vgpr_32 }
     78   - { id: 7, class: vgpr_32 }
     79   - { id: 8, class: sreg_32_xm0 }
     80   - { id: 9, class: vgpr_32 }
     81   - { id: 10, class: sreg_32_xm0 }
     82   - { id: 11, class: vgpr_32 }
     83   - { id: 17, class: vgpr_32 }
     84 body:             |
     85   bb.0:
     86     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
     87 
     88     %2 = COPY $sgpr30_sgpr31
     89     %1 = COPY $vgpr2_vgpr3
     90     %0 = COPY $vgpr0_vgpr1
     91     %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
     92     %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
     93 
     94     %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
     95     %10:sreg_32_xm0 = S_MOV_B32 255
     96     %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
     97     %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
     98     FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     99     S_ENDPGM
    100 
    101 ...
    102 ---
    103 # SDWA-LABEL: sdwa_preserve_remove
    104 # SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
    105 # SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
    106 
    107 # SDWA: v_mov_b32_sdwa [[FIRST]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
    108 
    109 # SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[FIRST]]
    110 
    111 name:            sdwa_preserve_remove
    112 tracksRegLiveness: true
    113 registers:
    114   - { id: 0, class: vreg_64 }
    115   - { id: 1, class: vreg_64 }
    116   - { id: 2, class: sreg_64 }
    117   - { id: 3, class: vgpr_32 }
    118   - { id: 4, class: vgpr_32 }
    119   - { id: 5, class: sreg_32_xm0_xexec }
    120   - { id: 6, class: vgpr_32 }
    121   - { id: 7, class: vgpr_32 }
    122   - { id: 8, class: sreg_32_xm0 }
    123   - { id: 9, class: vgpr_32 }
    124   - { id: 10, class: sreg_32_xm0 }
    125   - { id: 11, class: vgpr_32 }
    126   - { id: 17, class: vgpr_32 }
    127 body:             |
    128   bb.0:
    129     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
    130 
    131     %2 = COPY $sgpr30_sgpr31
    132     %1 = COPY $vgpr2_vgpr3
    133     %0 = COPY $vgpr0_vgpr1
    134     %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
    135     %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
    136 
    137     %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
    138     %10:sreg_32_xm0 = S_MOV_B32 65535
    139     %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
    140     %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
    141     FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
    142     S_ENDPGM
    143 
    144 ...
    145