Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s
      2 ; XFAIL: *
      3 
      4 @sPrivateStorage = external addrspace(3) global [256 x [8 x <4 x i64>]]
      5 
      6 ; GCN-LABEL: {{^}}ds_reorder_vector_split:
      7 
      8 ; Write zeroinitializer
      9 ; GCN-DAG: ds_write_b64 [[PTR:v[0-9]+]], [[VAL:v\[[0-9]+:[0-9]+\]]] offset:24
     10 ; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]] offset:16
     11 ; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]] offset:8
     12 ; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]]{{$}}
     13 
     14 ; GCN: s_waitcnt vmcnt
     15 
     16 ; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24
     17 ; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16
     18 ; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8
     19 
     20 ; GCN: s_waitcnt lgkmcnt
     21 
     22 ; GCN-DAG ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:8
     23 ; GCN-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:16
     24 ; GCN-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:24
     25 
     26 ; Appears to be dead store of vector component.
     27 ; GCN: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]$}}
     28 
     29 ; GCN: buffer_store_dwordx2
     30 ; GCN: buffer_store_dwordx2
     31 ; GCN: buffer_store_dwordx2
     32 ; GCN: buffer_store_dwordx2
     33 ; GCN: s_endpgm
     34 define void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly %srcValues, i32 addrspace(1)* nocapture readonly %offsets, <4 x i64> addrspace(1)* nocapture %destBuffer, i32 %alignmentOffset) #0 {
     35 entry:
     36   %tmp = tail call i32 @llvm.r600.read.local.size.y()
     37   %tmp1 = tail call i32 @llvm.r600.read.local.size.z()
     38   %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x()
     39   %tmp3 = tail call i32 @llvm.amdgcn.workitem.id.y()
     40   %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.z()
     41   %tmp6 = mul i32 %tmp2, %tmp
     42   %tmp10 = add i32 %tmp3, %tmp6
     43   %tmp11 = mul i32 %tmp10, %tmp1
     44   %tmp9 = add i32 %tmp11, %tmp4
     45   %x.i.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
     46   %x.i.12.i = tail call i32 @llvm.r600.read.local.size.x() #1
     47   %mul.26.i = mul i32 %x.i.12.i, %x.i.i
     48   %add.i = add i32 %tmp2, %mul.26.i
     49   %arrayidx = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %add.i
     50   store <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* %arrayidx
     51   %tmp12 = sext i32 %add.i to i64
     52   %arrayidx1 = getelementptr inbounds <4 x i64>, <4 x i64> addrspace(1)* %srcValues, i64 %tmp12
     53   %tmp13 = load <4 x i64>, <4 x i64> addrspace(1)* %arrayidx1
     54   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %offsets, i64 %tmp12
     55   %tmp14 = load i32, i32 addrspace(1)* %arrayidx2
     56   %add.ptr = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 0, i32 %alignmentOffset
     57   %mul.i = shl i32 %tmp14, 2
     58   %arrayidx.i = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr, i32 %mul.i
     59   %tmp15 = bitcast i64 addrspace(3)* %arrayidx.i to <4 x i64> addrspace(3)*
     60   store <4 x i64> %tmp13, <4 x i64> addrspace(3)* %tmp15
     61   %add.ptr6 = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %tmp14, i32 %alignmentOffset
     62   %tmp16 = sext i32 %tmp14 to i64
     63   %tmp17 = sext i32 %alignmentOffset to i64
     64   %add.ptr9 = getelementptr inbounds <4 x i64>, <4 x i64> addrspace(1)* %destBuffer, i64 %tmp16, i64 %tmp17
     65   %tmp18 = bitcast <4 x i64> %tmp13 to i256
     66   %trunc = trunc i256 %tmp18 to i64
     67   store i64 %trunc, i64 addrspace(1)* %add.ptr9
     68   %arrayidx10.1 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 1
     69   %tmp19 = load i64, i64 addrspace(3)* %arrayidx10.1
     70   %arrayidx11.1 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 1
     71   store i64 %tmp19, i64 addrspace(1)* %arrayidx11.1
     72   %arrayidx10.2 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 2
     73   %tmp20 = load i64, i64 addrspace(3)* %arrayidx10.2
     74   %arrayidx11.2 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 2
     75   store i64 %tmp20, i64 addrspace(1)* %arrayidx11.2
     76   %arrayidx10.3 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 3
     77   %tmp21 = load i64, i64 addrspace(3)* %arrayidx10.3
     78   %arrayidx11.3 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 3
     79   store i64 %tmp21, i64 addrspace(1)* %arrayidx11.3
     80   ret void
     81 }
     82 
     83 ; Function Attrs: nounwind readnone
     84 declare i32 @llvm.amdgcn.workgroup.id.x() #1
     85 
     86 ; Function Attrs: nounwind readnone
     87 declare i32 @llvm.r600.read.local.size.x() #1
     88 
     89 ; Function Attrs: nounwind readnone
     90 declare i32 @llvm.amdgcn.workitem.id.x() #1
     91 
     92 ; Function Attrs: nounwind readnone
     93 declare i32 @llvm.r600.read.local.size.y() #1
     94 
     95 ; Function Attrs: nounwind readnone
     96 declare i32 @llvm.r600.read.local.size.z() #1
     97 
     98 ; Function Attrs: nounwind readnone
     99 declare i32 @llvm.amdgcn.workitem.id.y() #1
    100 
    101 ; Function Attrs: nounwind readnone
    102 declare i32 @llvm.amdgcn.workitem.id.z() #1
    103 
    104 attributes #0 = { norecurse nounwind }
    105 attributes #1 = { nounwind readnone }
    106