Home | History | Annotate | Download | only in AMDGPU
      1 # RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass  post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
      2 # RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass  post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s
      3 
      4 ---
      5 # Trivial clause at beginning of program
      6 name: trivial_smem_clause_load_smrd4_x1
      7 
      8 body: |
      9   bb.0:
     10     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1
     11     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
     12     ; GCN-NEXT: S_ENDPGM
     13     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
     14     S_ENDPGM
     15 ...
     16 ---
     17 # Trivial clause at beginning of program
     18 name: trivial_smem_clause_load_smrd4_x2
     19 
     20 body: |
     21   bb.0:
     22     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2
     23     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
     24     ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
     25     ; GCN-NEXT: S_ENDPGM
     26     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
     27     $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
     28     S_ENDPGM
     29 ...
     30 ---
     31 # Trivial clause at beginning of program
     32 name: trivial_smem_clause_load_smrd4_x3
     33 
     34 body: |
     35   bb.0:
     36     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3
     37     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
     38     ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
     39     ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
     40     ; GCN-NEXT: S_ENDPGM
     41     $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
     42     $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
     43     $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
     44     S_ENDPGM
     45 ...
     46 ---
     47 # Trivial clause at beginning of program
     48 name: trivial_smem_clause_load_smrd4_x4
     49 
     50 body: |
     51   bb.0:
     52     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4
     53     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
     54     ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
     55     ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
     56     ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
     57     ; GCN-NEXT: S_ENDPGM
     58     $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
     59     $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
     60     $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
     61     $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
     62     S_ENDPGM
     63 ...
     64 ---
     65 # Reuse of same input pointer is OK
     66 name: trivial_smem_clause_load_smrd4_x2_sameptr
     67 body: |
     68   bb.0:
     69     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr
     70     ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
     71     ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
     72     ; GCN-NEXT: S_ENDPGM
     73     $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
     74     $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
     75     S_ENDPGM
     76 ...
     77 ---
     78 # 32-bit load partially clobbers its own ptr reg
     79 name: smrd_load4_overwrite_ptr_lo
     80 
     81 body: |
     82   bb.0:
     83     ; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo
     84     ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
     85     ; GCN-NEXT: S_ENDPGM
     86     $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
     87     S_ENDPGM
     88 ...
     89 ---
     90 # 32-bit load partially clobbers its own ptr reg
     91 name: smrd_load4_overwrite_ptr_hi
     92 
     93 body: |
     94   bb.0:
     95     ; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi
     96     ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
     97     ; GCN-NEXT: S_ENDPGM
     98     $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
     99     S_ENDPGM
    100 ...
    101 ---
    102 # 64-bit load clobbers its own ptr reg
    103 name: smrd_load8_overwrite_ptr
    104 
    105 body: |
    106   bb.0:
    107     ; GCN-LABEL: name: smrd_load8_overwrite_ptr
    108     ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
    109     ; GCN-NEXT: S_ENDPGM
    110     $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
    111     S_ENDPGM
    112 ...
    113 ---
    114 # vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt
    115 # breaks the clause.
    116 
    117 name: break_smem_clause_at_max_smem_clause_size_smrd_load4
    118 
    119 body: |
    120   bb.0:
    121     ; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4
    122     ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    123     ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    124     ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    125     ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    126     ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    127     ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    128     ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    129     ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    130     ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    131     ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    132     ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    133     ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    134     ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    135     ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    136     ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    137     ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    138     ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
    139     ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
    140     ; GCN-NEXT: S_ENDPGM
    141     $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    142     $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    143     $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    144     $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    145 
    146     $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    147     $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    148     $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    149     $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    150 
    151     $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    152     $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    153     $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    154     $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    155 
    156     $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    157     $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    158     $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    159     $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    160 
    161     $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
    162     $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
    163     S_ENDPGM
    164 ...
    165 ---
    166 
    167 name: break_smem_clause_simple_load_smrd4_lo_ptr
    168 
    169 body: |
    170   bb.0:
    171     ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr
    172     ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    173     ; XNACK-NEXT: S_NOP 0
    174     ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    175     ; GCN-NEXT: S_ENDPGM
    176     $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    177     $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    178     S_ENDPGM
    179 ...
    180 ---
    181 
    182 name: break_smem_clause_simple_load_smrd4_hi_ptr
    183 
    184 body: |
    185   bb.0:
    186     ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr
    187     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    188     ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    189     ; GCN-NEXT: S_ENDPGM
    190     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    191     $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    192     S_ENDPGM
    193 ...
    194 ---
    195 
    196 name: break_smem_clause_simple_load_smrd8_ptr
    197 
    198 body: |
    199   bb.0:
    200     ; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr
    201     ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
    202     ; XNACK-NEXT: S_NOP 0
    203     ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
    204     ; GCN-NEXT: S_ENDPGM
    205     $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
    206     $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
    207     S_ENDPGM
    208 ...
    209 ---
    210 
    211 name: break_smem_clause_simple_load_smrd16_ptr
    212 
    213 body: |
    214   bb.0:
    215     ; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr
    216     ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
    217     ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
    218     ; GCN-NEXT: S_ENDPGM
    219     $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
    220     $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
    221     S_ENDPGM
    222 ...
    223 ---
    224 
    225 name: break_smem_clause_block_boundary_load_smrd8_ptr
    226 
    227 body: |
    228   ; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr
    229   ; GCN: bb.0:
    230   ; GCN:   successors: %bb.1(0x80000000)
    231   ; GCN:   $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
    232   ; GCN: bb.1:
    233   ; XNACK-NEXT:   S_NOP 0
    234   ; GCN-NEXT:   $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
    235   ; GCN-NEXT:   S_ENDPGM
    236   bb.0:
    237     $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
    238 
    239   bb.1:
    240     $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
    241     S_ENDPGM
    242 ...
    243 ---
    244 # The load clobbers the pointer of the store, so it needs to break.
    245 
    246 name: break_smem_clause_store_load_into_ptr_smrd4
    247 
    248 body: |
    249   bb.0:
    250     ; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4
    251     ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
    252     ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
    253     ; GCN-NEXT: S_ENDPGM
    254     S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
    255     $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
    256     S_ENDPGM
    257 ...
    258 ---
    259 # The load clobbers the data of the store, so it needs to break.
    260 # FIXME: Would it be better to s_nop and wait later?
    261 
    262 name: break_smem_clause_store_load_into_data_smrd4
    263 
    264 body: |
    265   bb.0:
    266     ; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4
    267     ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
    268     ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    269     ; GCN-NEXT: S_ENDPGM
    270     S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
    271     $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    272     S_ENDPGM
    273 ...
    274 ---
    275 # Regular VALU instruction breaks clause, no nop needed
    276 name: valu_inst_breaks_smem_clause
    277 
    278 body: |
    279   bb.0:
    280     ; GCN-LABEL: name: valu_inst_breaks_smem_clause
    281     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    282     ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
    283     ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    284     ; GCN-NEXT: S_ENDPGM
    285     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    286     $vgpr8 = V_MOV_B32_e32 0, implicit $exec
    287     $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    288     S_ENDPGM
    289 ...
    290 ---
    291 # Regular SALU instruction breaks clause, no nop needed
    292 name: salu_inst_breaks_smem_clause
    293 
    294 body: |
    295   bb.0:
    296     ; GCN-LABEL: name: salu_inst_breaks_smem_clause
    297     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    298     ; GCN-NEXT: $sgpr8 = S_MOV_B32 0
    299     ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    300     ; GCN-NEXT: S_ENDPGM
    301     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    302     $sgpr8 = S_MOV_B32 0
    303     $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    304     S_ENDPGM
    305 ...
    306 ---
    307 name: ds_inst_breaks_smem_clause
    308 
    309 body: |
    310   bb.0:
    311     ; GCN-LABEL: name: ds_inst_breaks_smem_clause
    312     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    313     ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
    314     ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    315     ; GCN-NEXT: S_ENDPGM
    316     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    317     $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
    318     $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    319     S_ENDPGM
    320 ...
    321 ---
    322 
    323 name: flat_inst_breaks_smem_clause
    324 
    325 body: |
    326   bb.0:
    327     ; GCN-LABEL: name: flat_inst_breaks_smem_clause
    328     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    329     ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    330     ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    331     ; GCN-NEXT: S_ENDPGM
    332     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
    333     $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    334     $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
    335     S_ENDPGM
    336 ...
    337 ---
    338 # FIXME: Should this be handled?
    339 name: implicit_use_breaks_smem_clause
    340 
    341 body: |
    342   bb.0:
    343     ; GCN-LABEL: name: implicit_use_breaks_smem_clause
    344     ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
    345     ; XNACK-NEXT: S_NOP 0
    346     ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
    347     ; GCN-NEXT: S_ENDPGM
    348     $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
    349     $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
    350     S_ENDPGM
    351 ...
    352