1 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv5-unknown-freebsd -std=c11 | FileCheck %s 2 3 // Test that we are generating atomicrmw instructions, rather than 4 // compare-exchange loops for common atomic ops. This makes a big difference 5 // on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for 6 // the load and then another ll/sc in the loop, expanding to about 30 7 // instructions when it should be only 4. It has a smaller, but still 8 // noticeable, impact on platforms like x86 and RISC-V, where there are atomic 9 // RMW instructions. 10 // 11 // We currently emit cmpxchg loops for most operations on _Bools, because 12 // they're sufficiently rare that it's not worth making sure that the semantics 13 // are correct. 14 15 typedef int __attribute__((vector_size(16))) vector; 16 17 _Atomic(_Bool) b; 18 _Atomic(int) i; 19 _Atomic(long long) l; 20 _Atomic(short) s; 21 _Atomic(char*) p; 22 _Atomic(float) f; 23 _Atomic(vector) v; 24 25 // CHECK: testinc 26 void testinc(void) 27 { 28 // Special case for suffix bool++, sets to true and returns the old value. 29 // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst 30 b++; 31 // CHECK: atomicrmw add i32* @i, i32 1 seq_cst 32 i++; 33 // CHECK: atomicrmw add i64* @l, i64 1 seq_cst 34 l++; 35 // CHECK: atomicrmw add i16* @s, i16 1 seq_cst 36 s++; 37 // Prefix increment 38 // Special case for bool: set to true and return true 39 // CHECK: store atomic i8 1, i8* @b seq_cst, align 1 40 ++b; 41 // Currently, we have no variant of atomicrmw that returns the new value, so 42 // we have to generate an atomic add, which returns the old value, and then a 43 // non-atomic add. 44 // CHECK: atomicrmw add i32* @i, i32 1 seq_cst 45 // CHECK: add i32 46 ++i; 47 // CHECK: atomicrmw add i64* @l, i64 1 seq_cst 48 // CHECK: add i64 49 ++l; 50 // CHECK: atomicrmw add i16* @s, i16 1 seq_cst 51 // CHECK: add i16 52 ++s; 53 } 54 // CHECK: testdec 55 void testdec(void) 56 { 57 // CHECK: cmpxchg i8* @b 58 b--; 59 // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst 60 i--; 61 // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst 62 l--; 63 // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst 64 s--; 65 // CHECK: cmpxchg i8* @b 66 --b; 67 // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst 68 // CHECK: sub i32 69 --i; 70 // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst 71 // CHECK: sub i64 72 --l; 73 // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst 74 // CHECK: sub i16 75 --s; 76 } 77 // CHECK: testaddeq 78 void testaddeq(void) 79 { 80 // CHECK: cmpxchg i8* @b 81 // CHECK: atomicrmw add i32* @i, i32 42 seq_cst 82 // CHECK: atomicrmw add i64* @l, i64 42 seq_cst 83 // CHECK: atomicrmw add i16* @s, i16 42 seq_cst 84 b += 42; 85 i += 42; 86 l += 42; 87 s += 42; 88 } 89 // CHECK: testsubeq 90 void testsubeq(void) 91 { 92 // CHECK: cmpxchg i8* @b 93 // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst 94 // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst 95 // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst 96 b -= 42; 97 i -= 42; 98 l -= 42; 99 s -= 42; 100 } 101 // CHECK: testxoreq 102 void testxoreq(void) 103 { 104 // CHECK: cmpxchg i8* @b 105 // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst 106 // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst 107 // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst 108 b ^= 42; 109 i ^= 42; 110 l ^= 42; 111 s ^= 42; 112 } 113 // CHECK: testoreq 114 void testoreq(void) 115 { 116 // CHECK: cmpxchg i8* @b 117 // CHECK: atomicrmw or i32* @i, i32 42 seq_cst 118 // CHECK: atomicrmw or i64* @l, i64 42 seq_cst 119 // CHECK: atomicrmw or i16* @s, i16 42 seq_cst 120 b |= 42; 121 i |= 42; 122 l |= 42; 123 s |= 42; 124 } 125 // CHECK: testandeq 126 void testandeq(void) 127 { 128 // CHECK: cmpxchg i8* @b 129 // CHECK: atomicrmw and i32* @i, i32 42 seq_cst 130 // CHECK: atomicrmw and i64* @l, i64 42 seq_cst 131 // CHECK: atomicrmw and i16* @s, i16 42 seq_cst 132 b &= 42; 133 i &= 42; 134 l &= 42; 135 s &= 42; 136 } 137 138 // CHECK: define arm_aapcscc void @testFloat(float* 139 void testFloat(_Atomic(float) *fp) { 140 // CHECK: [[FP:%.*]] = alloca float* 141 // CHECK-NEXT: [[X:%.*]] = alloca float 142 // CHECK-NEXT: [[F:%.*]] = alloca float 143 // CHECK-NEXT: [[TMP0:%.*]] = alloca float 144 // CHECK-NEXT: [[TMP1:%.*]] = alloca float 145 // CHECK-NEXT: store float* {{%.*}}, float** [[FP]] 146 147 // CHECK-NEXT: [[T0:%.*]] = load float** [[FP]] 148 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4 149 __c11_atomic_init(fp, 1.0f); 150 151 // CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4 152 _Atomic(float) x = 2.0f; 153 154 // CHECK-NEXT: [[T0:%.*]] = load float** [[FP]] 155 // CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i8* 156 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[TMP0]] to i8* 157 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 4, i8* [[T1]], i8* [[T2]], i32 5) 158 // CHECK-NEXT: [[T3:%.*]] = load float* [[TMP0]], align 4 159 // CHECK-NEXT: store float [[T3]], float* [[F]] 160 float f = *fp; 161 162 // CHECK-NEXT: [[T0:%.*]] = load float* [[F]], align 4 163 // CHECK-NEXT: [[T1:%.*]] = load float** [[FP]], align 4 164 // CHECK-NEXT: store float [[T0]], float* [[TMP1]], align 4 165 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[T1]] to i8* 166 // CHECK-NEXT: [[T3:%.*]] = bitcast float* [[TMP1]] to i8* 167 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 4, i8* [[T2]], i8* [[T3]], i32 5) 168 *fp = f; 169 170 // CHECK-NEXT: ret void 171 } 172 173 // CHECK: define arm_aapcscc void @testComplexFloat([[CF:{ float, float }]]* 174 void testComplexFloat(_Atomic(_Complex float) *fp) { 175 // CHECK: [[FP:%.*]] = alloca [[CF]]*, align 4 176 // CHECK-NEXT: [[X:%.*]] = alloca [[CF]], align 8 177 // CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4 178 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8 179 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8 180 // CHECK-NEXT: store [[CF]]* 181 182 // CHECK-NEXT: [[P:%.*]] = load [[CF]]** [[FP]] 183 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 0 184 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 1 185 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]] 186 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]] 187 __c11_atomic_init(fp, 1.0f); 188 189 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 0 190 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 1 191 // CHECK-NEXT: store float 2.000000e+00, float* [[T0]] 192 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]] 193 _Atomic(_Complex float) x = 2.0f; 194 195 // CHECK-NEXT: [[T0:%.*]] = load [[CF]]** [[FP]] 196 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i8* 197 // CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[TMP0]] to i8* 198 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) 199 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 0 200 // CHECK-NEXT: [[R:%.*]] = load float* [[T0]] 201 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 1 202 // CHECK-NEXT: [[I:%.*]] = load float* [[T0]] 203 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0 204 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1 205 // CHECK-NEXT: store float [[R]], float* [[T0]] 206 // CHECK-NEXT: store float [[I]], float* [[T1]] 207 _Complex float f = *fp; 208 209 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0 210 // CHECK-NEXT: [[R:%.*]] = load float* [[T0]] 211 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1 212 // CHECK-NEXT: [[I:%.*]] = load float* [[T0]] 213 // CHECK-NEXT: [[DEST:%.*]] = load [[CF]]** [[FP]], align 4 214 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 0 215 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 1 216 // CHECK-NEXT: store float [[R]], float* [[T0]] 217 // CHECK-NEXT: store float [[I]], float* [[T1]] 218 // CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[DEST]] to i8* 219 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[TMP1]] to i8* 220 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T0]], i8* [[T1]], i32 5) 221 *fp = f; 222 223 // CHECK-NEXT: ret void 224 } 225 226 typedef struct { short x, y, z, w; } S; 227 // CHECK: define arm_aapcscc void @testStruct([[S:.*]]* 228 void testStruct(_Atomic(S) *fp) { 229 // CHECK: [[FP:%.*]] = alloca [[S]]*, align 4 230 // CHECK-NEXT: [[X:%.*]] = alloca [[S]], align 8 231 // CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2 232 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8 233 // CHECK-NEXT: store [[S]]* 234 235 // CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]] 236 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0 237 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2 238 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1 239 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2 240 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 2 241 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2 242 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 3 243 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2 244 __c11_atomic_init(fp, (S){1,2,3,4}); 245 246 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0 247 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2 248 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1 249 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2 250 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 2 251 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2 252 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 3 253 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2 254 _Atomic(S) x = (S){1,2,3,4}; 255 256 // CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]] 257 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i8* 258 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8* 259 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) 260 S f = *fp; 261 262 // CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]] 263 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8* 264 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8* 265 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false) 266 // CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[T0]] to i8* 267 // CHECK-NEXT: [[T4:%.*]] = bitcast [[S]]* [[TMP0]] to i8* 268 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T3]], i8* [[T4]], i32 5) 269 *fp = f; 270 271 // CHECK-NEXT: ret void 272 } 273 274 typedef struct { short x, y, z; } PS; 275 // CHECK: define arm_aapcscc void @testPromotedStruct([[APS:.*]]* 276 void testPromotedStruct(_Atomic(PS) *fp) { 277 // CHECK: [[FP:%.*]] = alloca [[APS]]*, align 4 278 // CHECK-NEXT: [[X:%.*]] = alloca [[APS]], align 8 279 // CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2 280 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8 281 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8 282 // CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 283 // CHECK-NEXT: [[TMP2:%.*]] = alloca %struct.PS, align 2 284 // CHECK-NEXT: [[TMP3:%.*]] = alloca [[APS]], align 8 285 // CHECK-NEXT: store [[APS]]* 286 287 // CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]] 288 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8* 289 // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false) 290 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[P]], i32 0, i32 0 291 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0 292 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2 293 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1 294 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2 295 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2 296 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2 297 __c11_atomic_init(fp, (PS){1,2,3}); 298 299 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8* 300 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false) 301 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0 302 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0 303 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2 304 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1 305 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2 306 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2 307 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2 308 _Atomic(PS) x = (PS){1,2,3}; 309 310 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]] 311 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8* 312 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP0]] to i8* 313 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) 314 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0 315 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8* 316 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8* 317 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false) 318 PS f = *fp; 319 320 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]] 321 // CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8* 322 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false) 323 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0 324 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8* 325 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8* 326 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false) 327 // CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[T0]] to i8* 328 // CHECK-NEXT: [[T5:%.*]] = bitcast [[APS]]* [[TMP1]] to i8* 329 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5) 330 *fp = f; 331 332 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]], align 4 333 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8* 334 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP3]] to i8* 335 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) 336 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP3]], i32 0, i32 0 337 // CHECK-NEXT: [[T1:%.*]] = bitcast %struct.PS* [[TMP2]] to i8* 338 // CHECK-NEXT: [[T2:%.*]] = bitcast %struct.PS* [[T0]] to i8* 339 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false) 340 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS* [[TMP2]], i32 0, i32 0 341 // CHECK-NEXT: [[T1:%.*]] = load i16* [[T0]], align 2 342 // CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32 343 // CHECK-NEXT: store i32 [[T2]], i32* [[A]], align 4 344 int a = ((PS)*fp).x; 345 346 // CHECK-NEXT: ret void 347 } 348 349 // CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]* 350 351 // FIXME: none of these look right, but we can leave the "test" here 352 // to make sure they at least don't crash. 353 void testPromotedStructOps(_Atomic(PS) *p) { 354 PS a = __c11_atomic_load(p, 5); 355 __c11_atomic_store(p, a, 5); 356 PS b = __c11_atomic_exchange(p, a, 5); 357 _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5); 358 v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5); 359 } 360