Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s
      2 
      3 ; Test that the VGPR spiller correctly switches to SGPR offsets when the
      4 ; instruction offset field would overflow, and that it accounts for memory
      5 ; swizzling.
      6 
      7 ; CHECK-LABEL: test_inst_offset_kernel
      8 define amdgpu_kernel void @test_inst_offset_kernel() {
      9 entry:
     10   ; Occupy 4092 bytes of scratch, so the offset of the spill of %a just fits in
     11   ; the instruction offset field.
     12   %alloca = alloca i8, i32 4088, align 4, addrspace(5)
     13   %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
     14 
     15   %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
     16   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill
     17   %a = load volatile i32, i32 addrspace(5)* %aptr
     18 
     19   ; Force %a to spill.
     20   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
     21 
     22   %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
     23   store volatile i32 %a, i32 addrspace(5)* %outptr
     24 
     25   ret void
     26 }
     27 
     28 ; CHECK-LABEL: test_sgpr_offset_kernel
     29 define amdgpu_kernel void @test_sgpr_offset_kernel() {
     30 entry:
     31   ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not
     32   ; fit in the instruction, and has to live in the SGPR offset.
     33   %alloca = alloca i8, i32 4092, align 4, addrspace(5)
     34   %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
     35 
     36   %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
     37   ; 0x40000 / 64 = 4096 (for wave64)
     38   ; CHECK: s_add_u32 s7, s7, 0x40000
     39   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 ; 4-byte Folded Spill
     40   ; CHECK: s_sub_u32 s7, s7, 0x40000
     41   %a = load volatile i32, i32 addrspace(5)* %aptr
     42 
     43   ; Force %a to spill
     44   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
     45 
     46   %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
     47   store volatile i32 %a, i32 addrspace(5)* %outptr
     48 
     49   ret void
     50 }
     51 
     52 ; CHECK-LABEL: test_sgpr_offset_subregs_kernel
     53 define amdgpu_kernel void @test_sgpr_offset_subregs_kernel() {
     54 entry:
     55   ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a
     56   ; still fits below offset 4096 (4088 + 8 - 4 = 4092), and can be placed in
     57   ; the instruction offset field.
     58   %alloca = alloca i8, i32 4084, align 4, addrspace(5)
     59   %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
     60   %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
     61 
     62   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
     63   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill
     64   %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
     65   %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
     66 
     67   ; Force %a to spill.
     68   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
     69 
     70   ; Ensure the alloca sticks around.
     71   %bptr = getelementptr i32, i32 addrspace(5)* %bufv1, i32 1
     72   %b = load volatile i32, i32 addrspace(5)* %bptr
     73 
     74   ; Ensure the spill is of the full super-reg.
     75   call void asm sideeffect "; $0", "r"(<2 x i32> %a)
     76 
     77   ret void
     78 }
     79 
     80 ; CHECK-LABEL: test_inst_offset_subregs_kernel
     81 define amdgpu_kernel void @test_inst_offset_subregs_kernel() {
     82 entry:
     83   ; Occupy 4092 bytes of scratch, so that the spill of the last subreg of %a
     84   ; does not fit below offset 4096 (4092 + 8 - 4 = 4096), and has to live
     85   ; in the SGPR offset.
     86   %alloca = alloca i8, i32 4088, align 4, addrspace(5)
     87   %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
     88   %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
     89 
     90   ; 0x3ff00 / 64 = 4092 (for wave64)
     91   ; CHECK: s_add_u32 s7, s7, 0x3ff00
     92   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 ; 4-byte Folded Spill
     93   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 offset:4 ; 4-byte Folded Spill
     94   ; CHECK: s_sub_u32 s7, s7, 0x3ff00
     95   %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
     96   %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
     97 
     98   ; Force %a to spill.
     99   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
    100 
    101   ; Ensure the alloca sticks around.
    102   %bptr = getelementptr i32, i32 addrspace(5)* %bufv1, i32 1
    103   %b = load volatile i32, i32 addrspace(5)* %bptr
    104 
    105   ; Ensure the spill is of the full super-reg.
    106   call void asm sideeffect "; $0", "r"(<2 x i32> %a)
    107 
    108   ret void
    109 }
    110 
    111 ; CHECK-LABEL: test_inst_offset_function
    112 define void @test_inst_offset_function() {
    113 entry:
    114   ; Occupy 4092 bytes of scratch, so the offset of the spill of %a just fits in
    115   ; the instruction offset field.
    116   %alloca = alloca i8, i32 4088, align 4, addrspace(5)
    117   %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
    118 
    119   %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
    120   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill
    121   %a = load volatile i32, i32 addrspace(5)* %aptr
    122 
    123   ; Force %a to spill.
    124   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
    125 
    126   %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
    127   store volatile i32 %a, i32 addrspace(5)* %outptr
    128 
    129   ret void
    130 }
    131 
    132 ; CHECK-LABEL: test_sgpr_offset_function
    133 define void @test_sgpr_offset_function() {
    134 entry:
    135   ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not
    136   ; fit in the instruction, and has to live in the SGPR offset.
    137   %alloca = alloca i8, i32 4092, align 4, addrspace(5)
    138   %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
    139 
    140   %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
    141   ; 0x40000 / 64 = 4096 (for wave64)
    142   ; CHECK: s_add_u32 s5, s5, 0x40000
    143   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 ; 4-byte Folded Spill
    144   ; CHECK: s_sub_u32 s5, s5, 0x40000
    145   %a = load volatile i32, i32 addrspace(5)* %aptr
    146 
    147   ; Force %a to spill
    148   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
    149 
    150   %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
    151   store volatile i32 %a, i32 addrspace(5)* %outptr
    152 
    153   ret void
    154 }
    155 
    156 ; CHECK-LABEL: test_sgpr_offset_subregs_function
    157 define void @test_sgpr_offset_subregs_function() {
    158 entry:
    159   ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a
    160   ; still fits below offset 4096 (4088 + 8 - 4 = 4092), and can be placed in
    161   ; the instruction offset field.
    162   %alloca = alloca i8, i32 4084, align 4, addrspace(5)
    163   %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
    164   %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
    165 
    166   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
    167   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill
    168   %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
    169   %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
    170 
    171   ; Force %a to spill.
    172   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
    173 
    174   ; Ensure the alloca sticks around.
    175   %bptr = getelementptr i32, i32 addrspace(5)* %bufv1, i32 1
    176   %b = load volatile i32, i32 addrspace(5)* %bptr
    177 
    178   ; Ensure the spill is of the full super-reg.
    179   call void asm sideeffect "; $0", "r"(<2 x i32> %a)
    180 
    181   ret void
    182 }
    183 
    184 ; CHECK-LABEL: test_inst_offset_subregs_function
    185 define void @test_inst_offset_subregs_function() {
    186 entry:
    187   ; Occupy 4092 bytes of scratch, so that the spill of the last subreg of %a
    188   ; does not fit below offset 4096 (4092 + 8 - 4 = 4096), and has to live
    189   ; in the SGPR offset.
    190   %alloca = alloca i8, i32 4088, align 4, addrspace(5)
    191   %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
    192   %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
    193 
    194   ; 0x3ff00 / 64 = 4092 (for wave64)
    195   ; CHECK: s_add_u32 s5, s5, 0x3ff00
    196   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 ; 4-byte Folded Spill
    197   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 offset:4 ; 4-byte Folded Spill
    198   ; CHECK: s_sub_u32 s5, s5, 0x3ff00
    199   %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
    200   %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
    201 
    202   ; Force %a to spill.
    203   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
    204 
    205   ; Ensure the alloca sticks around.
    206   %bptr = getelementptr i32, i32 addrspace(5)* %bufv1, i32 1
    207   %b = load volatile i32, i32 addrspace(5)* %bptr
    208 
    209   ; Ensure the spill is of the full super-reg.
    210   call void asm sideeffect "; $0", "r"(<2 x i32> %a)
    211 
    212   ret void
    213 }
    214