Home | History | Annotate | Download | only in NVPTX
      1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s
      2 
      3 declare i32 @llvm.nvvm.shfl.sync.down.i32(i32, i32, i32, i32)
      4 declare float @llvm.nvvm.shfl.sync.down.f32(float, i32, i32, i32)
      5 declare i32 @llvm.nvvm.shfl.sync.up.i32(i32, i32, i32, i32)
      6 declare float @llvm.nvvm.shfl.sync.up.f32(float, i32, i32, i32)
      7 declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32)
      8 declare float @llvm.nvvm.shfl.sync.bfly.f32(float, i32, i32, i32)
      9 declare i32 @llvm.nvvm.shfl.sync.idx.i32(i32, i32, i32, i32)
     10 declare float @llvm.nvvm.shfl.sync.idx.f32(float, i32, i32, i32)
     11 
     12 ; CHECK-LABEL: .func{{.*}}shfl.sync.rrr
     13 define i32 @shfl.sync.rrr(i32 %mask, i32 %a, i32 %b, i32 %c) {
     14   ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]]
     15   ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
     16   ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
     17   ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
     18   ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], [[B]], [[C]], [[MASK]];
     19   ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
     20   %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %mask, i32 %a, i32 %b, i32 %c)
     21   ret i32 %val
     22 }
     23 
     24 ; CHECK-LABEL: .func{{.*}}shfl.sync.irr
     25 define i32 @shfl.sync.irr(i32 %a, i32 %b, i32 %c) {
     26   ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
     27   ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
     28   ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
     29   ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], [[B]], [[C]], 1;
     30   ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
     31   %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 1, i32 %a, i32 %b, i32 %c)
     32   ret i32 %val
     33 }
     34 
     35 ; CHECK-LABEL: .func{{.*}}shfl.sync.rri
     36 define i32 @shfl.sync.rri(i32 %mask, i32 %a, i32 %b) {
     37   ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]]
     38   ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
     39   ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
     40   ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], [[B]], 1, [[MASK]];
     41   ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
     42   %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %mask, i32 %a, i32 %b, i32 1)
     43   ret i32 %val
     44 }
     45 
     46 ; CHECK-LABEL: .func{{.*}}shfl.sync.iri
     47 define i32 @shfl.sync.iri(i32 %a, i32 %b) {
     48   ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
     49   ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
     50   ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], [[B]], 2, 1;
     51   ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
     52   %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 1, i32 %a, i32 %b, i32 2)
     53   ret i32 %val
     54 }
     55 
     56 ; CHECK-LABEL: .func{{.*}}shfl.sync.rir
     57 define i32 @shfl.sync.rir(i32 %mask, i32 %a, i32 %c) {
     58   ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]]
     59   ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
     60   ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
     61   ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 1, [[C]], [[MASK]];
     62   ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
     63   %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %mask, i32 %a, i32 1, i32 %c)
     64   ret i32 %val
     65 }
     66 
     67 ; CHECK-LABEL: .func{{.*}}shfl.sync.iir
     68 define i32 @shfl.sync.iir(i32 %a, i32 %c) {
     69   ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
     70   ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
     71   ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 2, [[C]], 1;
     72   ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
     73   %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 1, i32 %a, i32 2, i32 %c)
     74   ret i32 %val
     75 }
     76 
     77 ; CHECK-LABEL: .func{{.*}}shfl.sync.rii
     78 define i32 @shfl.sync.rii(i32 %mask, i32 %a) {
     79   ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]]
     80   ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
     81   ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 1, 2, [[MASK]];
     82   ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
     83   %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %mask, i32 %a, i32 1, i32 2)
     84   ret i32 %val
     85 }
     86 
     87 ; CHECK-LABEL: .func{{.*}}shfl.sync.iii
     88 define i32 @shfl.sync.iii(i32 %a, i32 %b) {
     89   ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
     90   ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 2, 3, 1;
     91   ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
     92   %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 1, i32 %a, i32 2, i32 3)
     93   ret i32 %val
     94 }
     95