Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv5-unknown-freebsd -std=c11 | FileCheck %s
      2 
      3 // Test that we are generating atomicrmw instructions, rather than
      4 // compare-exchange loops for common atomic ops.  This makes a big difference
      5 // on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
      6 // the load and then another ll/sc in the loop, expanding to about 30
      7 // instructions when it should be only 4.  It has a smaller, but still
      8 // noticeable, impact on platforms like x86 and RISC-V, where there are atomic
      9 // RMW instructions.
     10 //
     11 // We currently emit cmpxchg loops for most operations on _Bools, because
     12 // they're sufficiently rare that it's not worth making sure that the semantics
     13 // are correct.
     14 
     15 typedef int __attribute__((vector_size(16))) vector;
     16 
     17 _Atomic(_Bool) b;
     18 _Atomic(int) i;
     19 _Atomic(long long) l;
     20 _Atomic(short) s;
     21 _Atomic(char*) p;
     22 _Atomic(float) f;
     23 _Atomic(vector) v;
     24 
     25 // CHECK: testinc
     26 void testinc(void)
     27 {
     28   // Special case for suffix bool++, sets to true and returns the old value.
     29   // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
     30   b++;
     31   // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
     32   i++;
     33   // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
     34   l++;
     35   // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
     36   s++;
     37   // Prefix increment
     38   // Special case for bool: set to true and return true
     39   // CHECK: store atomic i8 1, i8* @b seq_cst, align 1
     40   ++b;
     41   // Currently, we have no variant of atomicrmw that returns the new value, so
     42   // we have to generate an atomic add, which returns the old value, and then a
     43   // non-atomic add.
     44   // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
     45   // CHECK: add i32
     46   ++i;
     47   // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
     48   // CHECK: add i64
     49   ++l;
     50   // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
     51   // CHECK: add i16
     52   ++s;
     53 }
     54 // CHECK: testdec
     55 void testdec(void)
     56 {
     57   // CHECK: cmpxchg i8* @b
     58   b--;
     59   // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
     60   i--;
     61   // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
     62   l--;
     63   // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
     64   s--;
     65   // CHECK: cmpxchg i8* @b
     66   --b;
     67   // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
     68   // CHECK: sub i32
     69   --i;
     70   // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
     71   // CHECK: sub i64
     72   --l;
     73   // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
     74   // CHECK: sub i16
     75   --s;
     76 }
     77 // CHECK: testaddeq
     78 void testaddeq(void)
     79 {
     80   // CHECK: cmpxchg i8* @b
     81   // CHECK: atomicrmw add i32* @i, i32 42 seq_cst
     82   // CHECK: atomicrmw add i64* @l, i64 42 seq_cst
     83   // CHECK: atomicrmw add i16* @s, i16 42 seq_cst
     84   b += 42;
     85   i += 42;
     86   l += 42;
     87   s += 42;
     88 }
     89 // CHECK: testsubeq
     90 void testsubeq(void)
     91 {
     92   // CHECK: cmpxchg i8* @b
     93   // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
     94   // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
     95   // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
     96   b -= 42;
     97   i -= 42;
     98   l -= 42;
     99   s -= 42;
    100 }
    101 // CHECK: testxoreq
    102 void testxoreq(void)
    103 {
    104   // CHECK: cmpxchg i8* @b
    105   // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
    106   // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
    107   // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
    108   b ^= 42;
    109   i ^= 42;
    110   l ^= 42;
    111   s ^= 42;
    112 }
    113 // CHECK: testoreq
    114 void testoreq(void)
    115 {
    116   // CHECK: cmpxchg i8* @b
    117   // CHECK: atomicrmw or i32* @i, i32 42 seq_cst
    118   // CHECK: atomicrmw or i64* @l, i64 42 seq_cst
    119   // CHECK: atomicrmw or i16* @s, i16 42 seq_cst
    120   b |= 42;
    121   i |= 42;
    122   l |= 42;
    123   s |= 42;
    124 }
    125 // CHECK: testandeq
    126 void testandeq(void)
    127 {
    128   // CHECK: cmpxchg i8* @b
    129   // CHECK: atomicrmw and i32* @i, i32 42 seq_cst
    130   // CHECK: atomicrmw and i64* @l, i64 42 seq_cst
    131   // CHECK: atomicrmw and i16* @s, i16 42 seq_cst
    132   b &= 42;
    133   i &= 42;
    134   l &= 42;
    135   s &= 42;
    136 }
    137 
    138 // CHECK: define arm_aapcscc void @testFloat(float*
    139 void testFloat(_Atomic(float) *fp) {
    140 // CHECK:      [[FP:%.*]] = alloca float*
    141 // CHECK-NEXT: [[X:%.*]] = alloca float
    142 // CHECK-NEXT: [[F:%.*]] = alloca float
    143 // CHECK-NEXT: [[TMP0:%.*]] = alloca float
    144 // CHECK-NEXT: [[TMP1:%.*]] = alloca float
    145 // CHECK-NEXT: store float* {{%.*}}, float** [[FP]]
    146 
    147 // CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
    148 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4
    149   __c11_atomic_init(fp, 1.0f);
    150 
    151 // CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4
    152   _Atomic(float) x = 2.0f;
    153 
    154 // CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
    155 // CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i8*
    156 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[TMP0]] to i8*
    157 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 4, i8* [[T1]], i8* [[T2]], i32 5)
    158 // CHECK-NEXT: [[T3:%.*]] = load float* [[TMP0]], align 4
    159 // CHECK-NEXT: store float [[T3]], float* [[F]]
    160   float f = *fp;
    161 
    162 // CHECK-NEXT: [[T0:%.*]] = load float* [[F]], align 4
    163 // CHECK-NEXT: [[T1:%.*]] = load float** [[FP]], align 4
    164 // CHECK-NEXT: store float [[T0]], float* [[TMP1]], align 4
    165 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[T1]] to i8*
    166 // CHECK-NEXT: [[T3:%.*]] = bitcast float* [[TMP1]] to i8*
    167 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 4, i8* [[T2]], i8* [[T3]], i32 5)
    168   *fp = f;
    169 
    170 // CHECK-NEXT: ret void
    171 }
    172 
    173 // CHECK: define arm_aapcscc void @testComplexFloat([[CF:{ float, float }]]*
    174 void testComplexFloat(_Atomic(_Complex float) *fp) {
    175 // CHECK:      [[FP:%.*]] = alloca [[CF]]*, align 4
    176 // CHECK-NEXT: [[X:%.*]] = alloca [[CF]], align 8
    177 // CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4
    178 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8
    179 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8
    180 // CHECK-NEXT: store [[CF]]*
    181 
    182 // CHECK-NEXT: [[P:%.*]] = load [[CF]]** [[FP]]
    183 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 0
    184 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 1
    185 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]]
    186 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
    187   __c11_atomic_init(fp, 1.0f);
    188 
    189 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 0
    190 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 1
    191 // CHECK-NEXT: store float 2.000000e+00, float* [[T0]]
    192 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
    193   _Atomic(_Complex float) x = 2.0f;
    194 
    195 // CHECK-NEXT: [[T0:%.*]] = load [[CF]]** [[FP]]
    196 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i8*
    197 // CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[TMP0]] to i8*
    198 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
    199 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 0
    200 // CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
    201 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 1
    202 // CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
    203 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
    204 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
    205 // CHECK-NEXT: store float [[R]], float* [[T0]]
    206 // CHECK-NEXT: store float [[I]], float* [[T1]]
    207   _Complex float f = *fp;
    208 
    209 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
    210 // CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
    211 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
    212 // CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
    213 // CHECK-NEXT: [[DEST:%.*]] = load [[CF]]** [[FP]], align 4
    214 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 0
    215 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 1
    216 // CHECK-NEXT: store float [[R]], float* [[T0]]
    217 // CHECK-NEXT: store float [[I]], float* [[T1]]
    218 // CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[DEST]] to i8*
    219 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[TMP1]] to i8*
    220 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T0]], i8* [[T1]], i32 5)
    221   *fp = f;
    222 
    223 // CHECK-NEXT: ret void
    224 }
    225 
    226 typedef struct { short x, y, z, w; } S;
    227 // CHECK: define arm_aapcscc void @testStruct([[S:.*]]*
    228 void testStruct(_Atomic(S) *fp) {
    229 // CHECK:      [[FP:%.*]] = alloca [[S]]*, align 4
    230 // CHECK-NEXT: [[X:%.*]] = alloca [[S]], align 8
    231 // CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2
    232 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8
    233 // CHECK-NEXT: store [[S]]*
    234 
    235 // CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]]
    236 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0
    237 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
    238 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1
    239 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
    240 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 2
    241 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2
    242 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 3
    243 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
    244   __c11_atomic_init(fp, (S){1,2,3,4});
    245 
    246 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0
    247 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
    248 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1
    249 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
    250 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 2
    251 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2
    252 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 3
    253 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
    254   _Atomic(S) x = (S){1,2,3,4};
    255 
    256 // CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
    257 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i8*
    258 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
    259 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
    260   S f = *fp;
    261 
    262 // CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
    263 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
    264 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
    265 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false)
    266 // CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[T0]] to i8*
    267 // CHECK-NEXT: [[T4:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
    268 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T3]], i8* [[T4]], i32 5)
    269   *fp = f;
    270 
    271 // CHECK-NEXT: ret void
    272 }
    273 
    274 typedef struct { short x, y, z; } PS;
    275 // CHECK: define arm_aapcscc void @testPromotedStruct([[APS:.*]]*
    276 void testPromotedStruct(_Atomic(PS) *fp) {
    277 // CHECK:      [[FP:%.*]] = alloca [[APS]]*, align 4
    278 // CHECK-NEXT: [[X:%.*]] = alloca [[APS]], align 8
    279 // CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2
    280 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8
    281 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
    282 // CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
    283 // CHECK-NEXT: [[TMP2:%.*]] = alloca %struct.PS, align 2
    284 // CHECK-NEXT: [[TMP3:%.*]] = alloca [[APS]], align 8
    285 // CHECK-NEXT: store [[APS]]*
    286 
    287 // CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]]
    288 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8*
    289 // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
    290 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[P]], i32 0, i32 0
    291 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
    292 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
    293 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
    294 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
    295 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
    296 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2
    297   __c11_atomic_init(fp, (PS){1,2,3});
    298 
    299 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
    300 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
    301 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0
    302 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
    303 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
    304 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
    305 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
    306 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
    307 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2
    308   _Atomic(PS) x = (PS){1,2,3};
    309 
    310 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
    311 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
    312 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP0]] to i8*
    313 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
    314 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0
    315 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
    316 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
    317 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
    318   PS f = *fp;
    319 
    320 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
    321 // CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8*
    322 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false)
    323 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0
    324 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
    325 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
    326 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false)
    327 // CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[T0]] to i8*
    328 // CHECK-NEXT: [[T5:%.*]] = bitcast [[APS]]* [[TMP1]] to i8*
    329 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5)
    330   *fp = f;
    331 
    332 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]], align 4
    333 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
    334 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP3]] to i8*
    335 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
    336 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP3]], i32 0, i32 0
    337 // CHECK-NEXT: [[T1:%.*]] = bitcast %struct.PS* [[TMP2]] to i8*
    338 // CHECK-NEXT: [[T2:%.*]] = bitcast %struct.PS* [[T0]] to i8*
    339 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
    340 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS* [[TMP2]], i32 0, i32 0
    341 // CHECK-NEXT: [[T1:%.*]] = load i16* [[T0]], align 2
    342 // CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32
    343 // CHECK-NEXT: store i32 [[T2]], i32* [[A]], align 4
    344   int a = ((PS)*fp).x;
    345 
    346 // CHECK-NEXT: ret void
    347 }
    348 
    349 // CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]*
    350 
    351 // FIXME: none of these look right, but we can leave the "test" here
    352 // to make sure they at least don't crash.
    353 void testPromotedStructOps(_Atomic(PS) *p) {
    354   PS a = __c11_atomic_load(p, 5);
    355   __c11_atomic_store(p, a, 5);
    356   PS b = __c11_atomic_exchange(p, a, 5);
    357   _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5);
    358   v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5);
    359 }
    360