Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VGPR %s
      2 ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s
      3 ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VMEM %s
      4 
      5 ; ALL-LABEL: {{^}}spill_sgpr_x2:
      6 ; SMEM: s_add_u32 m0, s3, 0x100{{$}}
      7 ; SMEM: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Spill
      8 ; SMEM: s_cbranch_scc1
      9 
     10 ; SMEM: s_add_u32 m0, s3, 0x100{{$}}
     11 ; SMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Reload
     12 
     13 ; SMEM: s_dcache_wb
     14 ; SMEM: s_endpgm
     15 
     16 ; FIXME: Should only need 4 bytes
     17 ; SMEM: ScratchSize: 12
     18 
     19 
     20 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
     21 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
     22 ; VGPR: s_cbranch_scc1
     23 
     24 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
     25 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
     26 
     27 ; VMEM: buffer_store_dword
     28 ; VMEM: buffer_store_dword
     29 ; VMEM: s_cbranch_scc1
     30 
     31 ; VMEM: buffer_load_dword
     32 ; VMEM: buffer_load_dword
     33 define amdgpu_kernel void @spill_sgpr_x2(i32 addrspace(1)* %out, i32 %in) #0 {
     34   %wide.sgpr = call <2 x i32>  asm sideeffect "; def $0", "=s" () #0
     35   %cmp = icmp eq i32 %in, 0
     36   br i1 %cmp, label %bb0, label %ret
     37 
     38 bb0:
     39   call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr) #0
     40   br label %ret
     41 
     42 ret:
     43   ret void
     44 }
     45 
     46 ; ALL-LABEL: {{^}}spill_sgpr_x4:
     47 ; SMEM: s_add_u32 m0, s3, 0x100{{$}}
     48 ; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[12:15], m0 ; 16-byte Folded Spill
     49 ; SMEM: s_cbranch_scc1
     50 
     51 ; SMEM: s_add_u32 m0, s3, 0x100{{$}}
     52 ; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[12:15], m0 ; 16-byte Folded Reload
     53 ; SMEM: s_dcache_wb
     54 ; SMEM: s_endpgm
     55 
     56 ; FIXME: Should only need 4 bytes
     57 ; SMEM: ScratchSize: 20
     58 
     59 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
     60 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
     61 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
     62 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
     63 ; VGPR: s_cbranch_scc1
     64 
     65 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
     66 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
     67 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
     68 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
     69 
     70 
     71 ; VMEM: buffer_store_dword
     72 ; VMEM: buffer_store_dword
     73 ; VMEM: buffer_store_dword
     74 ; VMEM: buffer_store_dword
     75 ; VMEM: s_cbranch_scc1
     76 
     77 ; VMEM: buffer_load_dword
     78 ; VMEM: buffer_load_dword
     79 ; VMEM: buffer_load_dword
     80 ; VMEM: buffer_load_dword
     81 define amdgpu_kernel void @spill_sgpr_x4(i32 addrspace(1)* %out, i32 %in) #0 {
     82   %wide.sgpr = call <4 x i32>  asm sideeffect "; def $0", "=s" () #0
     83   %cmp = icmp eq i32 %in, 0
     84   br i1 %cmp, label %bb0, label %ret
     85 
     86 bb0:
     87   call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr) #0
     88   br label %ret
     89 
     90 ret:
     91   ret void
     92 }
     93 
     94 ; ALL-LABEL: {{^}}spill_sgpr_x8:
     95 
     96 ; SMEM: s_add_u32 m0, s3, 0x100{{$}}
     97 ; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Spill
     98 ; SMEM: s_add_u32 m0, s3, 0x110{{$}}
     99 ; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Spill
    100 ; SMEM: s_cbranch_scc1
    101 
    102 ; SMEM: s_add_u32 m0, s3, 0x100{{$}}
    103 ; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Reload
    104 ; SMEM: s_add_u32 m0, s3, 0x110{{$}}
    105 ; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Reload
    106 
    107 ; SMEM: s_dcache_wb
    108 ; SMEM: s_endpgm
    109 
    110 ; SMEM: ScratchSize: 36
    111 
    112 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
    113 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
    114 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
    115 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
    116 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
    117 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
    118 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
    119 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
    120 ; VGPR: s_cbranch_scc1
    121 
    122 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
    123 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
    124 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
    125 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
    126 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
    127 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
    128 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
    129 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
    130 
    131 ; VMEM: buffer_store_dword
    132 ; VMEM: buffer_store_dword
    133 ; VMEM: buffer_store_dword
    134 ; VMEM: buffer_store_dword
    135 ; VMEM: buffer_store_dword
    136 ; VMEM: buffer_store_dword
    137 ; VMEM: buffer_store_dword
    138 ; VMEM: buffer_store_dword
    139 ; VMEM: s_cbranch_scc1
    140 
    141 ; VMEM: buffer_load_dword
    142 ; VMEM: buffer_load_dword
    143 ; VMEM: buffer_load_dword
    144 ; VMEM: buffer_load_dword
    145 ; VMEM: buffer_load_dword
    146 ; VMEM: buffer_load_dword
    147 ; VMEM: buffer_load_dword
    148 ; VMEM: buffer_load_dword
    149 define amdgpu_kernel void @spill_sgpr_x8(i32 addrspace(1)* %out, i32 %in) #0 {
    150   %wide.sgpr = call <8 x i32>  asm sideeffect "; def $0", "=s" () #0
    151   %cmp = icmp eq i32 %in, 0
    152   br i1 %cmp, label %bb0, label %ret
    153 
    154 bb0:
    155   call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr) #0
    156   br label %ret
    157 
    158 ret:
    159   ret void
    160 }
    161 
    162 ; FIXME: x16 inlineasm seems broken
    163 ; define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 {
    164 ;   %wide.sgpr = call <16 x i32>  asm sideeffect "; def $0", "=s" () #0
    165 ;   %cmp = icmp eq i32 %in, 0
    166 ;   br i1 %cmp, label %bb0, label %ret
    167 
    168 ; bb0:
    169 ;   call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0
    170 ;   br label %ret
    171 
    172 ; ret:
    173 ;   ret void
    174 ; }
    175 
    176 attributes #0 = { nounwind }
    177