1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 3 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 4 5 ; Make sure the add and load are reduced to 32-bits even with the 6 ; bitcast to vector. 7 ; GCN-LABEL: {{^}}bitcast_int_to_vector_extract_0: 8 ; GCN-DAG: s_load_dword [[B:s[0-9]+]] 9 ; GCN-DAG: buffer_load_dword [[A:v[0-9]+]] 10 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, [[B]], [[A]] 11 ; GCN: buffer_store_dword [[ADD]] 12 define amdgpu_kernel void @bitcast_int_to_vector_extract_0(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %b) { 13 %tid = call i32 @llvm.amdgcn.workitem.id.x() 14 %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 15 %a = load i64, i64 addrspace(1)* %gep 16 %add = add i64 %a, %b 17 %val.bc = bitcast i64 %add to <2 x i32> 18 %extract = extractelement <2 x i32> %val.bc, i32 0 19 store i32 %extract, i32 addrspace(1)* %out 20 ret void 21 } 22 23 ; GCN-LABEL: {{^}}bitcast_fp_to_vector_extract_0: 24 ; GCN: buffer_load_dwordx2 25 ; GCN: v_add_f64 26 ; GCN: buffer_store_dword v 27 define amdgpu_kernel void @bitcast_fp_to_vector_extract_0(i32 addrspace(1)* %out, double addrspace(1)* %in, double %b) { 28 %tid = call i32 @llvm.amdgcn.workitem.id.x() 29 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid 30 %a = load double, double addrspace(1)* %gep 31 %add = fadd double %a, %b 32 %val.bc = bitcast double %add to <2 x i32> 33 %extract = extractelement <2 x i32> %val.bc, i32 0 34 store i32 %extract, i32 addrspace(1)* %out 35 ret void 36 } 37 38 ; GCN-LABEL: {{^}}bitcast_int_to_fpvector_extract_0: 39 ; GCN: buffer_load_dwordx2 40 ; GCN: v_add_i32 41 ; GCN: buffer_store_dword 42 define amdgpu_kernel void @bitcast_int_to_fpvector_extract_0(float addrspace(1)* %out, i64 addrspace(1)* %in, i64 %b) { 43 %tid = call i32 @llvm.amdgcn.workitem.id.x() 44 %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 45 %a = load i64, i64 addrspace(1)* %gep 46 %add = add i64 %a, %b 47 %val.bc = bitcast i64 %add to <2 x float> 48 %extract = extractelement <2 x float> %val.bc, i32 0 49 store float %extract, float addrspace(1)* %out 50 ret void 51 } 52 53 ; GCN-LABEL: {{^}}no_extract_volatile_load_extract0: 54 ; GCN: buffer_load_dwordx4 55 ; GCN: buffer_store_dword v 56 define amdgpu_kernel void @no_extract_volatile_load_extract0(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 57 entry: 58 %vec = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in 59 %elt0 = extractelement <4 x i32> %vec, i32 0 60 store i32 %elt0, i32 addrspace(1)* %out 61 ret void 62 } 63 64 ; GCN-LABEL: {{^}}no_extract_volatile_load_extract2: 65 ; GCN: buffer_load_dwordx4 66 ; GCN: buffer_store_dword v 67 68 define amdgpu_kernel void @no_extract_volatile_load_extract2(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 69 entry: 70 %vec = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in 71 %elt2 = extractelement <4 x i32> %vec, i32 2 72 store i32 %elt2, i32 addrspace(1)* %out 73 ret void 74 } 75 76 ; GCN-LABEL: {{^}}no_extract_volatile_load_dynextract: 77 ; GCN: buffer_load_dwordx4 78 ; GCN: buffer_store_dword v 79 define amdgpu_kernel void @no_extract_volatile_load_dynextract(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) { 80 entry: 81 %vec = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in 82 %eltN = extractelement <4 x i32> %vec, i32 %idx 83 store i32 %eltN, i32 addrspace(1)* %out 84 ret void 85 } 86