1 ; RUN: llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | FileCheck %s 2 target triple = "nvptx64-nvidia-cuda" 3 4 declare void @foo() 5 declare void @llvm.nvvm.barrier0() 6 7 ; syncthreads shouldn't be duplicated. 8 ; CHECK: .func call_syncthreads 9 ; CHECK: bar.sync 10 ; CHECK-NOT: bar.sync 11 define void @call_syncthreads(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind { 12 br i1 %cond, label %L1, label %L2 13 br i1 %cond2, label %Ret, label %L1 14 Ret: 15 ret void 16 L1: 17 store i32 0, i32* %a 18 br label %L42 19 L2: 20 store i32 1, i32* %a 21 br label %L42 22 L42: 23 call void @llvm.nvvm.barrier0() 24 br label %Ret 25 } 26 27 ; Check that call_syncthreads really does trigger tail duplication. 28 ; CHECK: .func call_foo 29 ; CHECK: call 30 ; CHECK: call 31 define void @call_foo(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind { 32 br i1 %cond, label %L1, label %L2 33 br i1 %cond2, label %Ret, label %L1 34 Ret: 35 ret void 36 L1: 37 store i32 0, i32* %a 38 br label %L42 39 L2: 40 store i32 1, i32* %a 41 br label %L42 42 L42: 43 call void @foo() 44 br label %Ret 45 } 46