Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv5-unknown-freebsd -std=c11 | FileCheck %s
      2 
      3 // Test that we are generating atomicrmw instructions, rather than
      4 // compare-exchange loops for common atomic ops.  This makes a big difference
      5 // on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
      6 // the load and then another ll/sc in the loop, expanding to about 30
      7 // instructions when it should be only 4.  It has a smaller, but still
      8 // noticeable, impact on platforms like x86 and RISC-V, where there are atomic
      9 // RMW instructions.
     10 //
     11 // We currently emit cmpxchg loops for most operations on _Bools, because
     12 // they're sufficiently rare that it's not worth making sure that the semantics
     13 // are correct.
     14 
     15 struct elem;
     16 
     17 struct ptr {
     18     struct elem *ptr;
     19 };
     20 // CHECK-DAG: %struct.ptr = type { %struct.elem* }
     21 
     22 struct elem {
     23     _Atomic(struct ptr) link;
     24 };
     25 // CHECK-DAG: %struct.elem = type { %struct.ptr }
     26 
     27 struct ptr object;
     28 // CHECK-DAG: @object = common global %struct.ptr zeroinitializer
     29 
     30 // CHECK-DAG: @testStructGlobal = global {{.*}} { i16 1, i16 2, i16 3, i16 4 }
     31 // CHECK-DAG: @testPromotedStructGlobal = global {{.*}} { %{{.*}} { i16 1, i16 2, i16 3 }, [2 x i8] zeroinitializer }
     32 
     33 
     34 typedef int __attribute__((vector_size(16))) vector;
     35 
     36 _Atomic(_Bool) b;
     37 _Atomic(int) i;
     38 _Atomic(long long) l;
     39 _Atomic(short) s;
     40 _Atomic(char*) p;
     41 _Atomic(float) f;
     42 _Atomic(vector) v;
     43 
     44 // CHECK: testinc
     45 void testinc(void)
     46 {
     47   // Special case for suffix bool++, sets to true and returns the old value.
     48   // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
     49   b++;
     50   // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
     51   i++;
     52   // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
     53   l++;
     54   // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
     55   s++;
     56   // Prefix increment
     57   // Special case for bool: set to true and return true
     58   // CHECK: store atomic i8 1, i8* @b seq_cst, align 1
     59   ++b;
     60   // Currently, we have no variant of atomicrmw that returns the new value, so
     61   // we have to generate an atomic add, which returns the old value, and then a
     62   // non-atomic add.
     63   // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
     64   // CHECK: add i32
     65   ++i;
     66   // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
     67   // CHECK: add i64
     68   ++l;
     69   // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
     70   // CHECK: add i16
     71   ++s;
     72 }
     73 // CHECK: testdec
     74 void testdec(void)
     75 {
     76   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
     77   b--;
     78   // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
     79   i--;
     80   // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
     81   l--;
     82   // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
     83   s--;
     84   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
     85   --b;
     86   // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
     87   // CHECK: sub i32
     88   --i;
     89   // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
     90   // CHECK: sub i64
     91   --l;
     92   // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
     93   // CHECK: sub i16
     94   --s;
     95 }
     96 // CHECK: testaddeq
     97 void testaddeq(void)
     98 {
     99   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
    100   // CHECK: atomicrmw add i32* @i, i32 42 seq_cst
    101   // CHECK: atomicrmw add i64* @l, i64 42 seq_cst
    102   // CHECK: atomicrmw add i16* @s, i16 42 seq_cst
    103   b += 42;
    104   i += 42;
    105   l += 42;
    106   s += 42;
    107 }
    108 // CHECK: testsubeq
    109 void testsubeq(void)
    110 {
    111   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
    112   // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
    113   // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
    114   // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
    115   b -= 42;
    116   i -= 42;
    117   l -= 42;
    118   s -= 42;
    119 }
    120 // CHECK: testxoreq
    121 void testxoreq(void)
    122 {
    123   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
    124   // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
    125   // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
    126   // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
    127   b ^= 42;
    128   i ^= 42;
    129   l ^= 42;
    130   s ^= 42;
    131 }
    132 // CHECK: testoreq
    133 void testoreq(void)
    134 {
    135   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
    136   // CHECK: atomicrmw or i32* @i, i32 42 seq_cst
    137   // CHECK: atomicrmw or i64* @l, i64 42 seq_cst
    138   // CHECK: atomicrmw or i16* @s, i16 42 seq_cst
    139   b |= 42;
    140   i |= 42;
    141   l |= 42;
    142   s |= 42;
    143 }
    144 // CHECK: testandeq
    145 void testandeq(void)
    146 {
    147   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
    148   // CHECK: atomicrmw and i32* @i, i32 42 seq_cst
    149   // CHECK: atomicrmw and i64* @l, i64 42 seq_cst
    150   // CHECK: atomicrmw and i16* @s, i16 42 seq_cst
    151   b &= 42;
    152   i &= 42;
    153   l &= 42;
    154   s &= 42;
    155 }
    156 
    157 // CHECK-LABEL: define arm_aapcscc void @testFloat(float*
    158 void testFloat(_Atomic(float) *fp) {
    159 // CHECK:      [[FP:%.*]] = alloca float*
    160 // CHECK-NEXT: [[X:%.*]] = alloca float
    161 // CHECK-NEXT: [[F:%.*]] = alloca float
    162 // CHECK-NEXT: [[TMP0:%.*]] = alloca float
    163 // CHECK-NEXT: [[TMP1:%.*]] = alloca float
    164 // CHECK-NEXT: store float* {{%.*}}, float** [[FP]]
    165 
    166 // CHECK-NEXT: [[T0:%.*]] = load float*, float** [[FP]]
    167 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4
    168   __c11_atomic_init(fp, 1.0f);
    169 
    170 // CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4
    171   _Atomic(float) x = 2.0f;
    172 
    173 // CHECK-NEXT: [[T0:%.*]] = load float*, float** [[FP]]
    174 // CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i8*
    175 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[TMP0]] to i8*
    176 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 4, i8* [[T1]], i8* [[T2]], i32 5)
    177 // CHECK-NEXT: [[T3:%.*]] = load float, float* [[TMP0]], align 4
    178 // CHECK-NEXT: store float [[T3]], float* [[F]]
    179   float f = *fp;
    180 
    181 // CHECK-NEXT: [[T0:%.*]] = load float, float* [[F]], align 4
    182 // CHECK-NEXT: [[T1:%.*]] = load float*, float** [[FP]], align 4
    183 // CHECK-NEXT: store float [[T0]], float* [[TMP1]], align 4
    184 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[T1]] to i8*
    185 // CHECK-NEXT: [[T3:%.*]] = bitcast float* [[TMP1]] to i8*
    186 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 4, i8* [[T2]], i8* [[T3]], i32 5)
    187   *fp = f;
    188 
    189 // CHECK-NEXT: ret void
    190 }
    191 
    192 // CHECK: define arm_aapcscc void @testComplexFloat([[CF:{ float, float }]]*
    193 void testComplexFloat(_Atomic(_Complex float) *fp) {
    194 // CHECK:      [[FP:%.*]] = alloca [[CF]]*, align 4
    195 // CHECK-NEXT: [[X:%.*]] = alloca [[CF]], align 8
    196 // CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4
    197 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8
    198 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8
    199 // CHECK-NEXT: store [[CF]]*
    200 
    201 // CHECK-NEXT: [[P:%.*]] = load [[CF]]*, [[CF]]** [[FP]]
    202 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[P]], i32 0, i32 0
    203 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[P]], i32 0, i32 1
    204 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]]
    205 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
    206   __c11_atomic_init(fp, 1.0f);
    207 
    208 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[X]], i32 0, i32 0
    209 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[X]], i32 0, i32 1
    210 // CHECK-NEXT: store float 2.000000e+00, float* [[T0]]
    211 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
    212   _Atomic(_Complex float) x = 2.0f;
    213 
    214 // CHECK-NEXT: [[T0:%.*]] = load [[CF]]*, [[CF]]** [[FP]]
    215 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i8*
    216 // CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[TMP0]] to i8*
    217 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
    218 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[TMP0]], i32 0, i32 0
    219 // CHECK-NEXT: [[R:%.*]] = load float, float* [[T0]]
    220 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[TMP0]], i32 0, i32 1
    221 // CHECK-NEXT: [[I:%.*]] = load float, float* [[T0]]
    222 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[F]], i32 0, i32 0
    223 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[F]], i32 0, i32 1
    224 // CHECK-NEXT: store float [[R]], float* [[T0]]
    225 // CHECK-NEXT: store float [[I]], float* [[T1]]
    226   _Complex float f = *fp;
    227 
    228 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[F]], i32 0, i32 0
    229 // CHECK-NEXT: [[R:%.*]] = load float, float* [[T0]]
    230 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[F]], i32 0, i32 1
    231 // CHECK-NEXT: [[I:%.*]] = load float, float* [[T0]]
    232 // CHECK-NEXT: [[DEST:%.*]] = load [[CF]]*, [[CF]]** [[FP]], align 4
    233 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[TMP1]], i32 0, i32 0
    234 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[TMP1]], i32 0, i32 1
    235 // CHECK-NEXT: store float [[R]], float* [[T0]]
    236 // CHECK-NEXT: store float [[I]], float* [[T1]]
    237 // CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[DEST]] to i8*
    238 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[TMP1]] to i8*
    239 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T0]], i8* [[T1]], i32 5)
    240   *fp = f;
    241 
    242 // CHECK-NEXT: ret void
    243 }
    244 
    245 typedef struct { short x, y, z, w; } S;
    246 _Atomic S testStructGlobal = (S){1, 2, 3, 4};
    247 // CHECK: define arm_aapcscc void @testStruct([[S:.*]]*
    248 void testStruct(_Atomic(S) *fp) {
    249 // CHECK:      [[FP:%.*]] = alloca [[S]]*, align 4
    250 // CHECK-NEXT: [[X:%.*]] = alloca [[S]], align 8
    251 // CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2
    252 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8
    253 // CHECK-NEXT: store [[S]]*
    254 
    255 // CHECK-NEXT: [[P:%.*]] = load [[S]]*, [[S]]** [[FP]]
    256 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 0
    257 // CHECK-NEXT: store i16 1, i16* [[T0]], align 8
    258 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 1
    259 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
    260 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 2
    261 // CHECK-NEXT: store i16 3, i16* [[T0]], align 4
    262 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 3
    263 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
    264   __c11_atomic_init(fp, (S){1,2,3,4});
    265 
    266 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 0
    267 // CHECK-NEXT: store i16 1, i16* [[T0]], align 8
    268 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 1
    269 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
    270 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 2
    271 // CHECK-NEXT: store i16 3, i16* [[T0]], align 4
    272 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 3
    273 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
    274   _Atomic(S) x = (S){1,2,3,4};
    275 
    276 // CHECK-NEXT: [[T0:%.*]] = load [[S]]*, [[S]]** [[FP]]
    277 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i8*
    278 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
    279 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
    280   S f = *fp;
    281 
    282 // CHECK-NEXT: [[T0:%.*]] = load [[S]]*, [[S]]** [[FP]]
    283 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
    284 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
    285 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false)
    286 // CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[T0]] to i8*
    287 // CHECK-NEXT: [[T4:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
    288 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T3]], i8* [[T4]], i32 5)
    289   *fp = f;
    290 
    291 // CHECK-NEXT: ret void
    292 }
    293 
    294 typedef struct { short x, y, z; } PS;
    295 _Atomic PS testPromotedStructGlobal = (PS){1, 2, 3};
    296 // CHECK: define arm_aapcscc void @testPromotedStruct([[APS:.*]]*
    297 void testPromotedStruct(_Atomic(PS) *fp) {
    298 // CHECK:      [[FP:%.*]] = alloca [[APS]]*, align 4
    299 // CHECK-NEXT: [[X:%.*]] = alloca [[APS]], align 8
    300 // CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2
    301 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8
    302 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
    303 // CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
    304 // CHECK-NEXT: [[TMP2:%.*]] = alloca %struct.PS, align 2
    305 // CHECK-NEXT: [[TMP3:%.*]] = alloca [[APS]], align 8
    306 // CHECK-NEXT: store [[APS]]*
    307 
    308 // CHECK-NEXT: [[P:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
    309 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8*
    310 // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
    311 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[P]], i32 0, i32 0
    312 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
    313 // CHECK-NEXT: store i16 1, i16* [[T1]], align 8
    314 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
    315 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
    316 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
    317 // CHECK-NEXT: store i16 3, i16* [[T1]], align 4
    318   __c11_atomic_init(fp, (PS){1,2,3});
    319 
    320 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
    321 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
    322 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[X]], i32 0, i32 0
    323 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
    324 // CHECK-NEXT: store i16 1, i16* [[T1]], align 8
    325 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
    326 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
    327 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
    328 // CHECK-NEXT: store i16 3, i16* [[T1]], align 4
    329   _Atomic(PS) x = (PS){1,2,3};
    330 
    331 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
    332 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
    333 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP0]] to i8*
    334 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
    335 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP0]], i32 0, i32 0
    336 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
    337 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
    338 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
    339   PS f = *fp;
    340 
    341 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
    342 // CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8*
    343 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false)
    344 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP1]], i32 0, i32 0
    345 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
    346 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
    347 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false)
    348 // CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[T0]] to i8*
    349 // CHECK-NEXT: [[T5:%.*]] = bitcast [[APS]]* [[TMP1]] to i8*
    350 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5)
    351   *fp = f;
    352 
    353 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]], align 4
    354 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
    355 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP3]] to i8*
    356 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
    357 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP3]], i32 0, i32 0
    358 // CHECK-NEXT: [[T1:%.*]] = bitcast %struct.PS* [[TMP2]] to i8*
    359 // CHECK-NEXT: [[T2:%.*]] = bitcast %struct.PS* [[T0]] to i8*
    360 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
    361 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS, %struct.PS* [[TMP2]], i32 0, i32 0
    362 // CHECK-NEXT: [[T1:%.*]] = load i16, i16* [[T0]], align 2
    363 // CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32
    364 // CHECK-NEXT: store i32 [[T2]], i32* [[A]], align 4
    365   int a = ((PS)*fp).x;
    366 
    367 // CHECK-NEXT: ret void
    368 }
    369 
    370 PS test_promoted_load(_Atomic(PS) *addr) {
    371   // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr)
    372   // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
    373   // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
    374   // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
    375   // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
    376   // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
    377   // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
    378   // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
    379   // CHECK:   [[RES:%.*]] = call arm_aapcscc i64 @__atomic_load_8(i8* [[ADDR8]], i32 5)
    380   // CHECK:   store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8
    381   // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
    382   // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
    383   // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
    384   // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
    385 
    386   return __c11_atomic_load(addr, 5);
    387 }
    388 
    389 void test_promoted_store(_Atomic(PS) *addr, PS *val) {
    390   // CHECK-LABEL: @test_promoted_store({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
    391   // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
    392   // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
    393   // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
    394   // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
    395   // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
    396   // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
    397   // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
    398   // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
    399   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
    400   // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
    401   // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
    402   // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
    403   // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
    404   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
    405   // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
    406   // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
    407   // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
    408   // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2
    409   // CHECK:   call arm_aapcscc void @__atomic_store_8(i8* [[ADDR8]], i64 [[VAL64]], i32 5)
    410   __c11_atomic_store(addr, *val, 5);
    411 }
    412 
    413 PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
    414   // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
    415   // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
    416   // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
    417   // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
    418   // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
    419   // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
    420   // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
    421   // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
    422   // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
    423   // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
    424   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
    425   // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
    426   // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
    427   // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
    428   // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
    429   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
    430   // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
    431   // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
    432   // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
    433   // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
    434   // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2
    435   // CHECK:   [[RES:%.*]] = call arm_aapcscc i64 @__atomic_exchange_8(i8* [[ADDR8]], i64 [[VAL64]], i32 5)
    436   // CHECK:   store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8
    437   // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
    438   // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
    439   // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
    440   // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
    441   return __c11_atomic_exchange(addr, *val, 5);
    442 }
    443 
    444 _Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
    445   // CHECK-LABEL: i1 @test_promoted_cmpxchg({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %desired, %struct.PS* %new) #0 {
    446   // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
    447   // CHECK:   [[DESIRED_ARG:%.*]] = alloca %struct.PS*, align 4
    448   // CHECK:   [[NEW_ARG:%.*]] = alloca %struct.PS*, align 4
    449   // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
    450   // CHECK:   [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
    451   // CHECK:   [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
    452   // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
    453   // CHECK:   store %struct.PS* %desired, %struct.PS** [[DESIRED_ARG]], align 4
    454   // CHECK:   store %struct.PS* %new, %struct.PS** [[NEW_ARG]], align 4
    455   // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
    456   // CHECK:   [[DESIRED:%.*]]= load %struct.PS*, %struct.PS** [[DESIRED_ARG]], align 4
    457   // CHECK:   [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4
    458   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
    459   // CHECK:   [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8*
    460   // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false)
    461   // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
    462   // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i8*
    463   // CHECK:   [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]]to i8*
    464   // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false)
    465   // CHECK:   [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i64*
    466   // CHECK:   [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8*
    467   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
    468   // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
    469   // CHECK:   [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64*
    470   // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
    471   // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast i64* [[ATOMIC_DESIRED64]] to i8*
    472   // CHECK:   [[NEW64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 2
    473   // CHECK:   [[RES:%.*]] = call arm_aapcscc zeroext i1 @__atomic_compare_exchange_8(i8* [[ADDR8]], i8* [[ATOMIC_DESIRED8]], i64 [[NEW64]], i32 5, i32 5)
    474   // CHECK:   ret i1 [[RES]]
    475   return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5);
    476 }
    477