Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      2 
      3 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
      4 
      5 ; Make sure the add and load are reduced to 32-bits even with the
      6 ; bitcast to vector.
      7 ; GCN-LABEL: {{^}}bitcast_int_to_vector_extract_0:
      8 ; GCN-DAG: s_load_dword [[B:s[0-9]+]]
      9 ; GCN-DAG: buffer_load_dword [[A:v[0-9]+]]
     10 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, [[B]], [[A]]
     11 ; GCN: buffer_store_dword [[ADD]]
     12 define amdgpu_kernel void @bitcast_int_to_vector_extract_0(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %b) {
     13    %tid = call i32 @llvm.amdgcn.workitem.id.x()
     14    %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 
     15    %a = load i64, i64 addrspace(1)* %gep
     16    %add = add i64 %a, %b
     17    %val.bc = bitcast i64 %add to <2 x i32>
     18    %extract = extractelement <2 x i32> %val.bc, i32 0
     19    store i32 %extract, i32 addrspace(1)* %out
     20    ret void
     21 }
     22 
     23 ; GCN-LABEL: {{^}}bitcast_fp_to_vector_extract_0:
     24 ; GCN: buffer_load_dwordx2
     25 ; GCN: v_add_f64
     26 ; GCN: buffer_store_dword v
     27 define amdgpu_kernel void @bitcast_fp_to_vector_extract_0(i32 addrspace(1)* %out, double addrspace(1)* %in, double %b) {
     28    %tid = call i32 @llvm.amdgcn.workitem.id.x()
     29    %gep = getelementptr double, double addrspace(1)* %in, i32 %tid 
     30    %a = load double, double addrspace(1)* %gep
     31    %add = fadd double %a, %b
     32    %val.bc = bitcast double %add to <2 x i32>
     33    %extract = extractelement <2 x i32> %val.bc, i32 0
     34    store i32 %extract, i32 addrspace(1)* %out
     35    ret void
     36 }
     37 
     38 ; GCN-LABEL: {{^}}bitcast_int_to_fpvector_extract_0:
     39 ; GCN: buffer_load_dwordx2
     40 ; GCN: v_add_i32
     41 ; GCN: buffer_store_dword
     42 define amdgpu_kernel void @bitcast_int_to_fpvector_extract_0(float addrspace(1)* %out, i64 addrspace(1)* %in, i64 %b) {
     43    %tid = call i32 @llvm.amdgcn.workitem.id.x()
     44    %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 
     45    %a = load i64, i64 addrspace(1)* %gep
     46    %add = add i64 %a, %b
     47    %val.bc = bitcast i64 %add to <2 x float>
     48    %extract = extractelement <2 x float> %val.bc, i32 0
     49    store float %extract, float addrspace(1)* %out
     50    ret void
     51 }
     52 
     53 ; GCN-LABEL: {{^}}no_extract_volatile_load_extract0:
     54 ; GCN: buffer_load_dwordx4
     55 ; GCN: buffer_store_dword v
     56 define amdgpu_kernel void @no_extract_volatile_load_extract0(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
     57 entry:
     58   %vec = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
     59   %elt0 = extractelement <4 x i32> %vec, i32 0
     60   store i32 %elt0, i32 addrspace(1)* %out
     61   ret void
     62 }
     63 
     64 ; GCN-LABEL: {{^}}no_extract_volatile_load_extract2:
     65 ; GCN: buffer_load_dwordx4
     66 ; GCN: buffer_store_dword v
     67 
     68 define amdgpu_kernel void @no_extract_volatile_load_extract2(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
     69 entry:
     70   %vec = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
     71   %elt2 = extractelement <4 x i32> %vec, i32 2
     72   store i32 %elt2, i32 addrspace(1)* %out
     73   ret void
     74 }
     75 
     76 ; GCN-LABEL: {{^}}no_extract_volatile_load_dynextract:
     77 ; GCN: buffer_load_dwordx4
     78 ; GCN: buffer_store_dword v
     79 define amdgpu_kernel void @no_extract_volatile_load_dynextract(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) {
     80 entry:
     81   %vec = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
     82   %eltN = extractelement <4 x i32> %vec, i32 %idx
     83   store i32 %eltN, i32 addrspace(1)* %out
     84   ret void
     85 }
     86