1 ; RUN: llc < %s -march=nvptx -mcpu=sm_60 | FileCheck %s -check-prefixes=CHECK,CHECK32 2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s 3 4 ; CHECK-LABEL: .func test_atomics_scope( 5 define void @test_atomics_scope(float* %fp, float %f, 6 double* %dfp, double %df, 7 i32* %ip, i32 %i, 8 i32* %uip, i32 %ui, 9 i64* %llp, i64 %ll) #0 { 10 entry: 11 ; CHECK: atom.cta.add.s32 12 %tmp36 = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* %ip, i32 %i) 13 ; CHECK: atom.cta.add.u64 14 %tmp38 = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll) 15 ; CHECK: atom.sys.add.s32 16 %tmp39 = tail call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32(i32* %ip, i32 %i) 17 ; CHECK: atom.sys.add.u64 18 %tmp41 = tail call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll) 19 ; CHECK: atom.cta.add.f32 20 %tmp42 = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* %fp, float %f) 21 ; CHECK: atom.cta.add.f64 22 %tmp43 = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* %dfp, double %df) 23 ; CHECK: atom.sys.add.f32 24 %tmp44 = tail call float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0f32(float* %fp, float %f) 25 ; CHECK: atom.sys.add.f64 26 %tmp45 = tail call double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0f64(double* %dfp, double %df) 27 28 ; CHECK: atom.cta.exch.b32 29 %tmp46 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32(i32* %ip, i32 %i) 30 ; CHECK: atom.cta.exch.b64 31 %tmp48 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll) 32 ; CHECK: atom.sys.exch.b32 33 %tmp49 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32(i32* %ip, i32 %i) 34 ; CHECK: atom.sys.exch.b64 35 %tmp51 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll) 36 37 ; CHECK: atom.cta.max.s32 38 %tmp52 = tail call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32(i32* %ip, i32 %i) 39 ; CHECK: atom.cta.max.s64 40 %tmp56 = tail call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll) 41 ; CHECK: atom.sys.max.s32 42 %tmp58 = tail call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32(i32* %ip, i32 %i) 43 ; CHECK: atom.sys.max.s64 44 %tmp62 = tail call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll) 45 46 ; CHECK: atom.cta.min.s32 47 %tmp64 = tail call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32(i32* %ip, i32 %i) 48 ; CHECK: atom.cta.min.s64 49 %tmp68 = tail call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll) 50 ; CHECK: atom.sys.min.s32 51 %tmp70 = tail call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32(i32* %ip, i32 %i) 52 ; CHECK: atom.sys.min.s64 53 %tmp74 = tail call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll) 54 55 ; CHECK: atom.cta.inc.u32 56 %tmp76 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0i32(i32* %ip, i32 %i) 57 ; CHECK: atom.sys.inc.u32 58 %tmp77 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0i32(i32* %ip, i32 %i) 59 60 ; CHECK: atom.cta.dec.u32 61 %tmp78 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0i32(i32* %ip, i32 %i) 62 ; CHECK: atom.sys.dec.u32 63 %tmp79 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0i32(i32* %ip, i32 %i) 64 65 ; CHECK: atom.cta.and.b32 66 %tmp80 = tail call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32(i32* %ip, i32 %i) 67 ; CHECK: atom.cta.and.b64 68 %tmp82 = tail call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll) 69 ; CHECK: atom.sys.and.b32 70 %tmp83 = tail call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32(i32* %ip, i32 %i) 71 ; CHECK: atom.sys.and.b64 72 %tmp85 = tail call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll) 73 74 ; CHECK: atom.cta.or.b32 75 %tmp86 = tail call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32(i32* %ip, i32 %i) 76 ; CHECK: atom.cta.or.b64 77 %tmp88 = tail call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll) 78 ; CHECK: atom.sys.or.b32 79 %tmp89 = tail call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32(i32* %ip, i32 %i) 80 ; CHECK: atom.sys.or.b64 81 %tmp91 = tail call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll) 82 83 ; CHECK: atom.cta.xor.b32 84 %tmp92 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32(i32* %ip, i32 %i) 85 ; CHECK: atom.cta.xor.b64 86 %tmp94 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll) 87 ; CHECK: atom.sys.xor.b32 88 %tmp95 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32(i32* %ip, i32 %i) 89 ; CHECK: atom.sys.xor.b64 90 %tmp97 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll) 91 92 ; CHECK: atom.cta.cas.b32 93 %tmp98 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 %i, i32 %i) 94 ; CHECK: atom.cta.cas.b64 95 %tmp100 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll, i64 %ll) 96 ; CHECK: atom.sys.cas.b32 97 %tmp101 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32(i32* %ip, i32 %i, i32 %i) 98 ; CHECK: atom.sys.cas.b64 99 %tmp103 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll, i64 %ll) 100 101 ; CHECK: ret 102 ret void 103 } 104 105 ; Make sure we use constants as operands to our scoped atomic calls, where appropriate. 106 ; CHECK-LABEL: .func test_atomics_scope_imm( 107 define void @test_atomics_scope_imm(float* %fp, float %f, 108 double* %dfp, double %df, 109 i32* %ip, i32 %i, 110 i32* %uip, i32 %ui, 111 i64* %llp, i64 %ll) #0 { 112 113 ; CHECK: atom.cta.add.s32{{.*}} %r{{[0-9]+}}; 114 %tmp1r = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* %ip, i32 %i) 115 ; CHECK: atom.cta.add.s32{{.*}}, 1; 116 %tmp1i = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* %ip, i32 1) 117 ; CHECK: atom.cta.add.u64{{.*}}, %rd{{[0-9]+}}; 118 %tmp2r = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll) 119 ; CHECK: atom.cta.add.u64{{.*}}, 2; 120 %tmp2i = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* %llp, i64 2) 121 122 ; CHECK: atom.cta.add.f32{{.*}}, %f{{[0-9]+}}; 123 %tmp3r = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* %fp, float %f) 124 ; CHECK: atom.cta.add.f32{{.*}}, 0f40400000; 125 %tmp3i = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* %fp, float 3.0) 126 ; CHECK: atom.cta.add.f64{{.*}}, %fd{{[0-9]+}}; 127 %tmp4r = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* %dfp, double %df) 128 ; CHECK: atom.cta.add.f64{{.*}}, 0d4010000000000000; 129 %tmp4i = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* %dfp, double 4.0) 130 131 ; CAS is implemented separately and has more arguments 132 ; CHECK: atom.cta.cas.b32{{.*}}], %r{{[0-9+]}}, %r{{[0-9+]}}; 133 %tmp5rr = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 %i, i32 %i) 134 ; For some reason in 64-bit mode we end up passing 51 via a register. 135 ; CHECK32: atom.cta.cas.b32{{.*}}], %r{{[0-9+]}}, 51; 136 %tmp5ri = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 %i, i32 51) 137 ; CHECK: atom.cta.cas.b32{{.*}}], 52, %r{{[0-9+]}}; 138 %tmp5ir = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 52, i32 %i) 139 ; CHECK: atom.cta.cas.b32{{.*}}], 53, 54; 140 %tmp5ii = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 53, i32 54) 141 142 ; CHECK: ret 143 ret void 144 } 145 146 declare i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1 147 declare i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1 148 declare i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1 149 declare i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1 150 declare float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* nocapture, float) #1 151 declare double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* nocapture, double) #1 152 declare float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0f32(float* nocapture, float) #1 153 declare double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0f64(double* nocapture, double) #1 154 declare i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1 155 declare i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1 156 declare i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1 157 declare i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1 158 declare i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1 159 declare i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1 160 declare i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1 161 declare i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1 162 declare i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1 163 declare i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1 164 declare i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1 165 declare i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1 166 declare i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1 167 declare i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1 168 declare i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1 169 declare i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1 170 declare i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1 171 declare i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1 172 declare i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1 173 declare i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1 174 declare i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1 175 declare i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1 176 declare i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1 177 declare i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1 178 declare i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1 179 declare i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1 180 declare i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1 181 declare i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1 182 declare i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* nocapture, i32, i32) #1 183 declare i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64(i64* nocapture, i64, i64) #1 184 declare i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32(i32* nocapture, i32, i32) #1 185 declare i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64(i64* nocapture, i64, i64) #1 186 187 attributes #1 = { argmemonly nounwind } 188