1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck --check-prefix=SM20 %s 2 ; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck --check-prefix=SM35 %s 3 4 5 declare i32 @llvm.nvvm.rotate.b32(i32, i32) 6 declare i64 @llvm.nvvm.rotate.b64(i64, i32) 7 declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) 8 9 ; SM20: rotate32 10 ; SM35: rotate32 11 define i32 @rotate32(i32 %a, i32 %b) { 12 ; SM20: shl.b32 13 ; SM20: sub.s32 14 ; SM20: shr.b32 15 ; SM20: add.u32 16 ; SM35: shf.l.wrap.b32 17 %val = tail call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 %b) 18 ret i32 %val 19 } 20 21 ; SM20: rotate64 22 ; SM35: rotate64 23 define i64 @rotate64(i64 %a, i32 %b) { 24 ; SM20: shl.b64 25 ; SM20: sub.u32 26 ; SM20: shr.b64 27 ; SM20: add.u64 28 ; SM35: shf.l.wrap.b32 29 ; SM35: shf.l.wrap.b32 30 %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 %b) 31 ret i64 %val 32 } 33 34 ; SM20: rotateright64 35 ; SM35: rotateright64 36 define i64 @rotateright64(i64 %a, i32 %b) { 37 ; SM20: shr.b64 38 ; SM20: sub.u32 39 ; SM20: shl.b64 40 ; SM20: add.u64 41 ; SM35: shf.r.wrap.b32 42 ; SM35: shf.r.wrap.b32 43 %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 %b) 44 ret i64 %val 45 } 46 47 ; SM20: rotl0 48 ; SM35: rotl0 49 define i32 @rotl0(i32 %x) { 50 ; SM20: shl.b32 51 ; SM20: shr.b32 52 ; SM20: add.u32 53 ; SM35: shf.l.wrap.b32 54 %t0 = shl i32 %x, 8 55 %t1 = lshr i32 %x, 24 56 %t2 = or i32 %t0, %t1 57 ret i32 %t2 58 } 59