Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      2 
      3 ; GCN-LABEL: {{^}}stored_fi_to_lds:
      4 ; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
      5 ; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}}
      6 ; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO1]]
      7 ; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}}
      8 ; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
      9 ; GCN: ds_write_b32  [[VLDSPTR]], [[ZERO0]]
     10 define void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {
     11   %tmp = alloca float
     12   store float 4.0, float *%tmp
     13   store float* %tmp, float* addrspace(3)* %ptr
     14   ret void
     15 }
     16 
     17 ; Offset is applied
     18 ; GCN-LABEL: {{^}}stored_fi_to_lds_2_small_objects:
     19 ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
     20 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
     21 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4{{$}}
     22 
     23 ; GCN-DAG: s_load_dword [[LDSPTR:s[0-9]+]]
     24 
     25 ; GCN-DAG: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
     26 ; GCN: ds_write_b32  [[VLDSPTR]], [[ZERO]]
     27 
     28 ; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}}
     29 ; GCN: ds_write_b32  [[VLDSPTR]], [[FI1]]
     30 define void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 {
     31   %tmp0 = alloca float
     32   %tmp1 = alloca float
     33   store float 4.0, float* %tmp0
     34   store float 4.0, float* %tmp1
     35   store volatile float* %tmp0, float* addrspace(3)* %ptr
     36   store volatile float* %tmp1, float* addrspace(3)* %ptr
     37   ret void
     38 }
     39 
     40 ; Same frame index is used multiple times in the store
     41 ; GCN-LABEL: {{^}}stored_fi_to_self:
     42 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4d2{{$}}
     43 ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
     44 ; GCN: buffer_store_dword [[K]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
     45 ; GCN: buffer_store_dword [[ZERO]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
     46 define void @stored_fi_to_self() #0 {
     47   %tmp = alloca i32*
     48 
     49   ; Avoid optimizing everything out
     50   store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp
     51   %bitcast = bitcast i32** %tmp to i32*
     52   store volatile i32* %bitcast, i32** %tmp
     53   ret void
     54 }
     55 
     56 ; GCN-LABEL: {{^}}stored_fi_to_self_offset:
     57 ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
     58 ; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 32{{$}}
     59 ; GCN: buffer_store_dword [[K0]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
     60 
     61 ; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0x4d2{{$}}
     62 ; GCN: buffer_store_dword [[K1]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2048{{$}}
     63 
     64 ; GCN: v_mov_b32_e32 [[OFFSETK:v[0-9]+]], 0x800{{$}}
     65 ; GCN: buffer_store_dword [[OFFSETK]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2048{{$}}
     66 define void @stored_fi_to_self_offset() #0 {
     67   %tmp0 = alloca [512 x i32]
     68   %tmp1 = alloca i32*
     69 
     70   ; Avoid optimizing everything out
     71   %tmp0.cast = bitcast [512 x i32]* %tmp0 to i32*
     72   store volatile i32 32, i32* %tmp0.cast
     73 
     74   store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp1
     75 
     76   %bitcast = bitcast i32** %tmp1 to i32*
     77   store volatile i32* %bitcast, i32** %tmp1
     78   ret void
     79 }
     80 
     81 ; GCN-LABEL: {{^}}stored_fi_to_fi:
     82 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
     83 ; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
     84 ; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4{{$}}
     85 ; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:8{{$}}
     86 
     87 ; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}}
     88 ; GCN: buffer_store_dword [[FI1]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:8{{$}}
     89 
     90 ; GCN: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}}
     91 ; GCN: buffer_store_dword [[FI2]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4{{$}}
     92 define void @stored_fi_to_fi() #0 {
     93   %tmp0 = alloca i32*
     94   %tmp1 = alloca i32*
     95   %tmp2 = alloca i32*
     96   store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp0
     97   store volatile i32* inttoptr (i32 5678 to i32*), i32** %tmp1
     98   store volatile i32* inttoptr (i32 9999 to i32*), i32** %tmp2
     99 
    100   %bitcast1 = bitcast i32** %tmp1 to i32*
    101   %bitcast2 = bitcast i32** %tmp2 to i32* ;  at offset 8
    102 
    103   store volatile i32* %bitcast1, i32** %tmp2 ; store offset 4 at offset 8
    104   store volatile i32* %bitcast2, i32** %tmp1 ; store offset 8 at offset 4
    105   ret void
    106 }
    107 
    108 ; GCN-LABEL: {{^}}stored_fi_to_global:
    109 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
    110 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
    111 ; GCN: buffer_store_dword [[FI]]
    112 define void @stored_fi_to_global(float* addrspace(1)* %ptr) #0 {
    113   %tmp = alloca float
    114   store float 0.0, float *%tmp
    115   store float* %tmp, float* addrspace(1)* %ptr
    116   ret void
    117 }
    118 
    119 ; Offset is applied
    120 ; GCN-LABEL: {{^}}stored_fi_to_global_2_small_objects:
    121 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
    122 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
    123 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
    124 
    125 ; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}}
    126 ; GCN: buffer_store_dword [[FI1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
    127 
    128 ; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}}
    129 ; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
    130 define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {
    131   %tmp0 = alloca float
    132   %tmp1 = alloca float
    133   %tmp2 = alloca float
    134   store volatile float 0.0, float *%tmp0
    135   store volatile float 0.0, float *%tmp1
    136   store volatile float 0.0, float *%tmp2
    137   store volatile float* %tmp1, float* addrspace(1)* %ptr
    138   store volatile float* %tmp2, float* addrspace(1)* %ptr
    139   ret void
    140 }
    141 
    142 ; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset:
    143 ; GCN: s_add_i32 [[BASE_1_OFF_0:s[0-9]+]], 0, 0x3ffc
    144 ; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
    145 ; GCN: buffer_store_dword [[BASE_0]], v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
    146 
    147 ; GCN: v_mov_b32_e32 [[V_BASE_1_OFF_0:v[0-9]+]], [[BASE_1_OFF_0]]
    148 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
    149 ; GCN: s_add_i32 [[BASE_1_OFF_1:s[0-9]+]], 0, 56
    150 ; GCN: buffer_store_dword [[K]], [[V_BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
    151 
    152 ; GCN: v_mov_b32_e32 [[V_BASE_1_OFF_1:v[0-9]+]], [[BASE_1_OFF_1]]
    153 ; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
    154 define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
    155   %tmp0 = alloca [4096 x i32]
    156   %tmp1 = alloca [4096 x i32]
    157   %gep0.tmp0 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 0
    158   store volatile i32 0, i32* %gep0.tmp0
    159   %gep1.tmp0 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 4095
    160   store volatile i32 999, i32* %gep1.tmp0
    161   %gep0.tmp1 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 14
    162   store i32* %gep0.tmp1, i32* addrspace(1)* %ptr
    163   ret void
    164 }
    165 
    166 attributes #0 = { nounwind }
    167