Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -S -amdgpu-lower-intrinsics %s | FileCheck -check-prefix=OPT %s
      2 
      3 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1
      4 declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #1
      5 
      6 declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1
      7 declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i1) #1
      8 
      9 ; Test the upper bound for sizes to leave
     10 ; OPT-LABEL: @max_size_small_static_memcpy_caller0(
     11 ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false)
     12 define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
     13   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false)
     14   ret void
     15 }
     16 
     17 ; Smallest static size which will be expanded
     18 ; OPT-LABEL: @min_size_large_static_memcpy_caller0(
     19 ; OPT-NOT: call
     20 ; OPT: br label %load-store-loop
     21 ; OPT: [[T1:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %loop-index
     22 ; OPT-NEXT: [[T2:%[0-9]+]] = load i8, i8 addrspace(1)* [[T1]]
     23 ; OPT-NEXT: [[T3:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %loop-index
     24 ; OPT-NEXT: store i8 [[T2]], i8 addrspace(1)* [[T3]]
     25 ; OPT-NEXT: [[T4:%[0-9]+]] = add i64 %loop-index, 1
     26 ; OPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025
     27 ; OPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split
     28 define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
     29   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false)
     30   ret void
     31 }
     32 
     33 ; OPT-LABEL: @max_size_small_static_memmove_caller0(
     34 ; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false)
     35 define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
     36   call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false)
     37   ret void
     38 }
     39 
     40 ; OPT-LABEL: @min_size_large_static_memmove_caller0(
     41 ; OPT-NOT: call
     42 ; OPT: getelementptr
     43 ; OPT-NEXT: load i8
     44 ; OPT: getelementptr
     45 ; OPT-NEXT: store i8
     46 define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
     47   call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false)
     48   ret void
     49 }
     50 
     51 ; OPT-LABEL: @max_size_small_static_memset_caller0(
     52 ; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false)
     53 define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
     54   call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false)
     55   ret void
     56 }
     57 
     58 ; OPT-LABEL: @min_size_large_static_memset_caller0(
     59 ; OPT-NOT: call
     60 ; OPT: getelementptr
     61 ; OPT: store i8
     62 define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
     63   call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i1 false)
     64   ret void
     65 }
     66 
     67 ; OPT-LABEL: @variable_memcpy_caller0(
     68 ; OPT-NOT: call
     69 ; OPT: phi
     70 define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
     71   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false)
     72   ret void
     73 }
     74 
     75 ; OPT-LABEL: @variable_memcpy_caller1(
     76 ; OPT-NOT: call
     77 ; OPT: phi
     78 define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
     79   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false)
     80   ret void
     81 }
     82 
     83 ; OPT-LABEL: @memcpy_multi_use_one_function(
     84 ; OPT-NOT: call
     85 ; OPT: phi
     86 ; OPT-NOT: call
     87 ; OPT: phi
     88 ; OPT-NOT: call
     89 define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 {
     90   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false)
     91   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i1 false)
     92   ret void
     93 }
     94 
     95 ; OPT-LABEL: @memcpy_alt_type(
     96 ; OPT: phi
     97 ; OPT: getelementptr inbounds i8, i8 addrspace(3)*
     98 ; OPT: load i8, i8 addrspace(3)*
     99 ; OPT: getelementptr inbounds i8, i8 addrspace(1)*
    100 ; OPT: store i8
    101 define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 {
    102   call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i1 false)
    103   ret void
    104 }
    105 
    106 ; One of the uses in the function should be expanded, the other left alone.
    107 ; OPT-LABEL: @memcpy_multi_use_one_function_keep_small(
    108 ; OPT: getelementptr inbounds i8, i8 addrspace(1)*
    109 ; OPT: load i8, i8 addrspace(1)*
    110 ; OPT: getelementptr inbounds i8, i8 addrspace(1)*
    111 ; OPT: store i8
    112 
    113 ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false)
    114 define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 {
    115   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false)
    116   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false)
    117   ret void
    118 }
    119 
    120 attributes #0 = { nounwind }
    121 attributes #1 = { argmemonly nounwind }
    122