Home | History | Annotate | Download | only in NVPTX
      1 ; RUN: llc < %s -march=nvptx -mcpu=sm_60 | FileCheck %s -check-prefixes=CHECK,CHECK32
      2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s
      3 
      4 ; CHECK-LABEL: .func test_atomics_scope(
      5 define void @test_atomics_scope(float* %fp, float %f,
      6                                 double* %dfp, double %df,
      7                                 i32* %ip, i32 %i,
      8                                 i32* %uip, i32 %ui,
      9                                 i64* %llp, i64 %ll) #0 {
     10 entry:
     11 ; CHECK: atom.cta.add.s32
     12   %tmp36 = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
     13 ; CHECK: atom.cta.add.u64
     14   %tmp38 = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
     15 ; CHECK: atom.sys.add.s32
     16   %tmp39 = tail call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
     17 ; CHECK: atom.sys.add.u64
     18   %tmp41 = tail call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
     19 ; CHECK: atom.cta.add.f32
     20   %tmp42 = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* %fp, float %f)
     21 ; CHECK: atom.cta.add.f64
     22   %tmp43 = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* %dfp, double %df)
     23 ; CHECK: atom.sys.add.f32
     24   %tmp44 = tail call float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0f32(float* %fp, float %f)
     25 ; CHECK: atom.sys.add.f64
     26   %tmp45 = tail call double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0f64(double* %dfp, double %df)
     27 
     28 ; CHECK: atom.cta.exch.b32
     29   %tmp46 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
     30 ; CHECK: atom.cta.exch.b64
     31   %tmp48 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
     32 ; CHECK: atom.sys.exch.b32
     33   %tmp49 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
     34 ; CHECK: atom.sys.exch.b64
     35   %tmp51 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
     36 
     37 ; CHECK: atom.cta.max.s32
     38   %tmp52 = tail call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
     39 ; CHECK: atom.cta.max.s64
     40   %tmp56 = tail call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
     41 ; CHECK: atom.sys.max.s32
     42   %tmp58 = tail call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
     43 ; CHECK: atom.sys.max.s64
     44   %tmp62 = tail call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
     45 
     46 ; CHECK: atom.cta.min.s32
     47   %tmp64 = tail call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
     48 ; CHECK: atom.cta.min.s64
     49   %tmp68 = tail call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
     50 ; CHECK: atom.sys.min.s32
     51   %tmp70 = tail call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
     52 ; CHECK: atom.sys.min.s64
     53   %tmp74 = tail call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
     54 
     55 ; CHECK: atom.cta.inc.u32
     56   %tmp76 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
     57 ; CHECK: atom.sys.inc.u32
     58   %tmp77 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
     59 
     60 ; CHECK: atom.cta.dec.u32
     61   %tmp78 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
     62 ; CHECK: atom.sys.dec.u32
     63   %tmp79 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
     64 
     65 ; CHECK: atom.cta.and.b32
     66   %tmp80 = tail call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
     67 ; CHECK: atom.cta.and.b64
     68   %tmp82 = tail call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
     69 ; CHECK: atom.sys.and.b32
     70   %tmp83 = tail call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
     71 ; CHECK: atom.sys.and.b64
     72   %tmp85 = tail call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
     73 
     74 ; CHECK: atom.cta.or.b32
     75   %tmp86 = tail call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
     76 ; CHECK: atom.cta.or.b64
     77   %tmp88 = tail call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
     78 ; CHECK: atom.sys.or.b32
     79   %tmp89 = tail call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
     80 ; CHECK: atom.sys.or.b64
     81   %tmp91 = tail call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
     82 
     83 ; CHECK: atom.cta.xor.b32
     84   %tmp92 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
     85 ; CHECK: atom.cta.xor.b64
     86   %tmp94 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
     87 ; CHECK: atom.sys.xor.b32
     88   %tmp95 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
     89 ; CHECK: atom.sys.xor.b64
     90   %tmp97 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
     91 
     92 ; CHECK: atom.cta.cas.b32
     93   %tmp98 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 %i, i32 %i)
     94 ; CHECK: atom.cta.cas.b64
     95   %tmp100 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll, i64 %ll)
     96 ; CHECK: atom.sys.cas.b32
     97   %tmp101 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32(i32* %ip, i32 %i, i32 %i)
     98 ; CHECK: atom.sys.cas.b64
     99   %tmp103 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll, i64 %ll)
    100 
    101   ; CHECK: ret
    102   ret void
    103 }
    104 
    105 ; Make sure we use constants as operands to our scoped atomic calls, where appropriate.
    106 ; CHECK-LABEL: .func test_atomics_scope_imm(
    107 define void @test_atomics_scope_imm(float* %fp, float %f,
    108                                     double* %dfp, double %df,
    109                                     i32* %ip, i32 %i,
    110                                     i32* %uip, i32 %ui,
    111                                     i64* %llp, i64 %ll) #0 {
    112 
    113 ; CHECK: atom.cta.add.s32{{.*}} %r{{[0-9]+}};
    114   %tmp1r = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
    115 ; CHECK: atom.cta.add.s32{{.*}}, 1;
    116   %tmp1i = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* %ip, i32 1)
    117 ; CHECK: atom.cta.add.u64{{.*}}, %rd{{[0-9]+}};
    118   %tmp2r = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
    119 ; CHECK: atom.cta.add.u64{{.*}}, 2;
    120   %tmp2i = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* %llp, i64 2)
    121 
    122 ; CHECK: atom.cta.add.f32{{.*}}, %f{{[0-9]+}};
    123   %tmp3r = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* %fp, float %f)
    124 ; CHECK: atom.cta.add.f32{{.*}}, 0f40400000;
    125   %tmp3i = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* %fp, float 3.0)
    126 ; CHECK: atom.cta.add.f64{{.*}}, %fd{{[0-9]+}};
    127   %tmp4r = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* %dfp, double %df)
    128 ; CHECK: atom.cta.add.f64{{.*}}, 0d4010000000000000;
    129   %tmp4i = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* %dfp, double 4.0)
    130 
    131 ; CAS is implemented separately and has more arguments
    132 ; CHECK: atom.cta.cas.b32{{.*}}], %r{{[0-9+]}}, %r{{[0-9+]}};
    133   %tmp5rr = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 %i, i32 %i)
    134 ; For some reason in 64-bit mode we end up passing 51 via a register.
    135 ; CHECK32: atom.cta.cas.b32{{.*}}], %r{{[0-9+]}}, 51;
    136   %tmp5ri = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 %i, i32 51)
    137 ; CHECK: atom.cta.cas.b32{{.*}}], 52, %r{{[0-9+]}};
    138   %tmp5ir = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 52, i32 %i)
    139 ; CHECK: atom.cta.cas.b32{{.*}}], 53, 54;
    140   %tmp5ii = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 53, i32 54)
    141 
    142   ; CHECK: ret
    143   ret void
    144 }
    145 
    146 declare i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
    147 declare i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
    148 declare i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
    149 declare i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
    150 declare float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* nocapture, float) #1
    151 declare double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* nocapture, double) #1
    152 declare float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0f32(float* nocapture, float) #1
    153 declare double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0f64(double* nocapture, double) #1
    154 declare i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
    155 declare i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
    156 declare i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
    157 declare i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
    158 declare i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
    159 declare i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
    160 declare i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
    161 declare i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
    162 declare i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
    163 declare i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
    164 declare i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
    165 declare i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
    166 declare i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
    167 declare i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
    168 declare i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
    169 declare i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
    170 declare i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
    171 declare i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
    172 declare i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
    173 declare i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
    174 declare i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
    175 declare i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
    176 declare i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
    177 declare i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
    178 declare i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
    179 declare i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
    180 declare i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
    181 declare i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
    182 declare i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* nocapture, i32, i32) #1
    183 declare i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64(i64* nocapture, i64, i64) #1
    184 declare i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32(i32* nocapture, i32, i32) #1
    185 declare i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64(i64* nocapture, i64, i64) #1
    186 
    187 attributes #1 = { argmemonly nounwind }
    188