Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      2 
      3 ; Test that when extracting the same unknown vector index from an
      4 ; insertelement the dynamic indexing is folded away.
      5 
      6 declare i32 @llvm.amdgcn.workitem.id.x() #0
      7 
      8 ; No dynamic indexing required
      9 ; GCN-LABEL: {{^}}extract_insert_same_dynelt_v4i32:
     10 ; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd{{$}}
     11 ; GCN-NOT buffer_load_dword
     12 ; GCN-NOT: [[VAL]]
     13 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
     14 ; GCN-NOT: [[VVAL]]
     15 ; GCN: buffer_store_dword [[VVAL]]
     16 define void @extract_insert_same_dynelt_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %val, i32 %idx) #1 {
     17   %id = call i32 @llvm.amdgcn.workitem.id.x()
     18   %id.ext = sext i32 %id to i64
     19   %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %in, i64 %id.ext
     20   %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %id.ext
     21   %vec = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in
     22   %insert = insertelement <4 x i32> %vec, i32 %val, i32 %idx
     23   %extract = extractelement <4 x i32> %insert, i32 %idx
     24   store i32 %extract, i32 addrspace(1)* %gep.out
     25   ret void
     26 }
     27 
     28 ; GCN-LABEL: {{^}}extract_insert_different_dynelt_v4i32:
     29 ; GCN: buffer_load_dwordx4
     30 ; GCN: v_movreld_b32
     31 ; GCN: v_movrels_b32
     32 ; GCN: buffer_store_dword v
     33 define void @extract_insert_different_dynelt_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %val, i32 %idx0, i32 %idx1) #1 {
     34   %id = call i32 @llvm.amdgcn.workitem.id.x()
     35   %id.ext = sext i32 %id to i64
     36   %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %in, i64 %id.ext
     37   %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %id.ext
     38   %vec = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in
     39   %insert = insertelement <4 x i32> %vec, i32 %val, i32 %idx0
     40   %extract = extractelement <4 x i32> %insert, i32 %idx1
     41   store i32 %extract, i32 addrspace(1)* %gep.out
     42   ret void
     43 }
     44 
     45 ; GCN-LABEL: {{^}}extract_insert_same_elt2_v4i32:
     46 ; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd{{$}}
     47 ; GCN-NOT buffer_load_dword
     48 ; GCN-NOT: [[VAL]]
     49 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
     50 ; GCN-NOT: [[VVAL]]
     51 ; GCN: buffer_store_dword [[VVAL]]
     52 define void @extract_insert_same_elt2_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %val, i32 %idx) #1 {
     53   %id = call i32 @llvm.amdgcn.workitem.id.x()
     54   %id.ext = sext i32 %id to i64
     55   %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %in, i64 %id.ext
     56   %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %id.ext
     57   %vec = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in
     58   %insert = insertelement <4 x i32> %vec, i32 %val, i32 %idx
     59   %extract = extractelement <4 x i32> %insert, i32 %idx
     60   store i32 %extract, i32 addrspace(1)* %gep.out
     61   ret void
     62 }
     63 
     64 ; GCN-LABEL: {{^}}extract_insert_same_dynelt_v4f32:
     65 ; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd{{$}}
     66 ; GCN-NOT buffer_load_dword
     67 ; GCN-NOT: [[VAL]]
     68 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
     69 ; GCN-NOT: [[VVAL]]
     70 ; GCN: buffer_store_dword [[VVAL]]
     71 define void @extract_insert_same_dynelt_v4f32(float addrspace(1)* %out, <4 x float> addrspace(1)* %in, float %val, i32 %idx) #1 {
     72   %id = call i32 @llvm.amdgcn.workitem.id.x()
     73   %id.ext = sext i32 %id to i64
     74   %gep.in = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %in, i64 %id.ext
     75   %gep.out = getelementptr inbounds float, float addrspace(1)* %out, i64 %id.ext
     76   %vec = load volatile <4 x float>, <4 x float> addrspace(1)* %gep.in
     77   %insert = insertelement <4 x float> %vec, float %val, i32 %idx
     78   %extract = extractelement <4 x float> %insert, i32 %idx
     79   store float %extract, float addrspace(1)* %gep.out
     80   ret void
     81 }
     82 
     83 attributes #0 = { nounwind readnone }
     84 attributes #1 = { nounwind }