1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s 2 3 declare i32 @llvm.nvvm.shfl.sync.down.i32(i32, i32, i32, i32) 4 declare float @llvm.nvvm.shfl.sync.down.f32(float, i32, i32, i32) 5 declare i32 @llvm.nvvm.shfl.sync.up.i32(i32, i32, i32, i32) 6 declare float @llvm.nvvm.shfl.sync.up.f32(float, i32, i32, i32) 7 declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) 8 declare float @llvm.nvvm.shfl.sync.bfly.f32(float, i32, i32, i32) 9 declare i32 @llvm.nvvm.shfl.sync.idx.i32(i32, i32, i32, i32) 10 declare float @llvm.nvvm.shfl.sync.idx.f32(float, i32, i32, i32) 11 12 ; CHECK-LABEL: .func{{.*}}shfl.sync.rrr 13 define i32 @shfl.sync.rrr(i32 %mask, i32 %a, i32 %b, i32 %c) { 14 ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] 15 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 16 ; CHECK: ld.param.u32 [[B:%r[0-9]+]] 17 ; CHECK: ld.param.u32 [[C:%r[0-9]+]] 18 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], [[B]], [[C]], [[MASK]]; 19 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 20 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %mask, i32 %a, i32 %b, i32 %c) 21 ret i32 %val 22 } 23 24 ; CHECK-LABEL: .func{{.*}}shfl.sync.irr 25 define i32 @shfl.sync.irr(i32 %a, i32 %b, i32 %c) { 26 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 27 ; CHECK: ld.param.u32 [[B:%r[0-9]+]] 28 ; CHECK: ld.param.u32 [[C:%r[0-9]+]] 29 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], [[B]], [[C]], 1; 30 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 31 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 1, i32 %a, i32 %b, i32 %c) 32 ret i32 %val 33 } 34 35 ; CHECK-LABEL: .func{{.*}}shfl.sync.rri 36 define i32 @shfl.sync.rri(i32 %mask, i32 %a, i32 %b) { 37 ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] 38 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 39 ; CHECK: ld.param.u32 [[B:%r[0-9]+]] 40 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], [[B]], 1, [[MASK]]; 41 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 42 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %mask, i32 %a, i32 %b, i32 1) 43 ret i32 %val 44 } 45 46 ; CHECK-LABEL: .func{{.*}}shfl.sync.iri 47 define i32 @shfl.sync.iri(i32 %a, i32 %b) { 48 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 49 ; CHECK: ld.param.u32 [[B:%r[0-9]+]] 50 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], [[B]], 2, 1; 51 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 52 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 1, i32 %a, i32 %b, i32 2) 53 ret i32 %val 54 } 55 56 ; CHECK-LABEL: .func{{.*}}shfl.sync.rir 57 define i32 @shfl.sync.rir(i32 %mask, i32 %a, i32 %c) { 58 ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] 59 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 60 ; CHECK: ld.param.u32 [[C:%r[0-9]+]] 61 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 1, [[C]], [[MASK]]; 62 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 63 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %mask, i32 %a, i32 1, i32 %c) 64 ret i32 %val 65 } 66 67 ; CHECK-LABEL: .func{{.*}}shfl.sync.iir 68 define i32 @shfl.sync.iir(i32 %a, i32 %c) { 69 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 70 ; CHECK: ld.param.u32 [[C:%r[0-9]+]] 71 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 2, [[C]], 1; 72 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 73 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 1, i32 %a, i32 2, i32 %c) 74 ret i32 %val 75 } 76 77 ; CHECK-LABEL: .func{{.*}}shfl.sync.rii 78 define i32 @shfl.sync.rii(i32 %mask, i32 %a) { 79 ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] 80 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 81 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 1, 2, [[MASK]]; 82 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 83 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %mask, i32 %a, i32 1, i32 2) 84 ret i32 %val 85 } 86 87 ; CHECK-LABEL: .func{{.*}}shfl.sync.iii 88 define i32 @shfl.sync.iii(i32 %a, i32 %b) { 89 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 90 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 2, 3, 1; 91 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 92 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 1, i32 %a, i32 2, i32 3) 93 ret i32 %val 94 } 95