1 ; RUN: opt -S -amdgpu-lower-intrinsics %s | FileCheck -check-prefix=OPT %s 2 3 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1 4 declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #1 5 6 declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1 7 declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i1) #1 8 9 ; Test the upper bound for sizes to leave 10 ; OPT-LABEL: @max_size_small_static_memcpy_caller0( 11 ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) 12 define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { 13 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) 14 ret void 15 } 16 17 ; Smallest static size which will be expanded 18 ; OPT-LABEL: @min_size_large_static_memcpy_caller0( 19 ; OPT-NOT: call 20 ; OPT: br label %load-store-loop 21 ; OPT: [[T1:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %loop-index 22 ; OPT-NEXT: [[T2:%[0-9]+]] = load i8, i8 addrspace(1)* [[T1]] 23 ; OPT-NEXT: [[T3:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %loop-index 24 ; OPT-NEXT: store i8 [[T2]], i8 addrspace(1)* [[T3]] 25 ; OPT-NEXT: [[T4:%[0-9]+]] = add i64 %loop-index, 1 26 ; OPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025 27 ; OPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split 28 define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { 29 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false) 30 ret void 31 } 32 33 ; OPT-LABEL: @max_size_small_static_memmove_caller0( 34 ; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) 35 define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { 36 call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) 37 ret void 38 } 39 40 ; OPT-LABEL: @min_size_large_static_memmove_caller0( 41 ; OPT-NOT: call 42 ; OPT: getelementptr 43 ; OPT-NEXT: load i8 44 ; OPT: getelementptr 45 ; OPT-NEXT: store i8 46 define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { 47 call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false) 48 ret void 49 } 50 51 ; OPT-LABEL: @max_size_small_static_memset_caller0( 52 ; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false) 53 define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { 54 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false) 55 ret void 56 } 57 58 ; OPT-LABEL: @min_size_large_static_memset_caller0( 59 ; OPT-NOT: call 60 ; OPT: getelementptr 61 ; OPT: store i8 62 define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { 63 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i1 false) 64 ret void 65 } 66 67 ; OPT-LABEL: @variable_memcpy_caller0( 68 ; OPT-NOT: call 69 ; OPT: phi 70 define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { 71 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false) 72 ret void 73 } 74 75 ; OPT-LABEL: @variable_memcpy_caller1( 76 ; OPT-NOT: call 77 ; OPT: phi 78 define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { 79 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false) 80 ret void 81 } 82 83 ; OPT-LABEL: @memcpy_multi_use_one_function( 84 ; OPT-NOT: call 85 ; OPT: phi 86 ; OPT-NOT: call 87 ; OPT: phi 88 ; OPT-NOT: call 89 define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 { 90 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false) 91 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i1 false) 92 ret void 93 } 94 95 ; OPT-LABEL: @memcpy_alt_type( 96 ; OPT: phi 97 ; OPT: getelementptr inbounds i8, i8 addrspace(3)* 98 ; OPT: load i8, i8 addrspace(3)* 99 ; OPT: getelementptr inbounds i8, i8 addrspace(1)* 100 ; OPT: store i8 101 define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 { 102 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i1 false) 103 ret void 104 } 105 106 ; One of the uses in the function should be expanded, the other left alone. 107 ; OPT-LABEL: @memcpy_multi_use_one_function_keep_small( 108 ; OPT: getelementptr inbounds i8, i8 addrspace(1)* 109 ; OPT: load i8, i8 addrspace(1)* 110 ; OPT: getelementptr inbounds i8, i8 addrspace(1)* 111 ; OPT: store i8 112 113 ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false) 114 define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 { 115 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false) 116 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false) 117 ret void 118 } 119 120 attributes #0 = { nounwind } 121 attributes #1 = { argmemonly nounwind } 122