Home | History | Annotate | Download | only in NVPTX
      1 ; RUN: llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | FileCheck %s
      2 target triple = "nvptx64-nvidia-cuda"
      3 
      4 declare void @foo()
      5 declare void @llvm.nvvm.barrier0()
      6 
      7 ; syncthreads shouldn't be duplicated.
      8 ; CHECK: .func call_syncthreads
      9 ; CHECK: bar.sync
     10 ; CHECK-NOT: bar.sync
     11 define void @call_syncthreads(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
     12   br i1 %cond, label %L1, label %L2
     13   br i1 %cond2, label %Ret, label %L1
     14 Ret:
     15   ret void
     16 L1:
     17   store i32 0, i32* %a
     18   br label %L42
     19 L2:
     20   store i32 1, i32* %a
     21   br label %L42
     22 L42:
     23   call void @llvm.nvvm.barrier0()
     24   br label %Ret
     25 }
     26 
     27 ; Check that call_syncthreads really does trigger tail duplication.
     28 ; CHECK: .func call_foo
     29 ; CHECK: call
     30 ; CHECK: call
     31 define void @call_foo(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
     32   br i1 %cond, label %L1, label %L2
     33   br i1 %cond2, label %Ret, label %L1
     34 Ret:
     35   ret void
     36 L1:
     37   store i32 0, i32* %a
     38   br label %L42
     39 L2:
     40   store i32 1, i32* %a
     41   br label %L42
     42 L42:
     43   call void @foo()
     44   br label %Ret
     45 }
     46