1 ; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix CHECK-ARMV7 2 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2 3 ; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-T1 4 ; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-T1 5 ; RUN: llc < %s -mtriple=thumbv7--none-eabi -thread-model single -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-BAREMETAL 6 7 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" 8 9 define void @func(i32 %argc, i8** %argv) nounwind { 10 entry: 11 %argc.addr = alloca i32 ; <i32*> [#uses=1] 12 %argv.addr = alloca i8** ; <i8***> [#uses=1] 13 %val1 = alloca i32 ; <i32*> [#uses=2] 14 %val2 = alloca i32 ; <i32*> [#uses=15] 15 %andt = alloca i32 ; <i32*> [#uses=2] 16 %ort = alloca i32 ; <i32*> [#uses=2] 17 %xort = alloca i32 ; <i32*> [#uses=2] 18 %old = alloca i32 ; <i32*> [#uses=18] 19 %temp = alloca i32 ; <i32*> [#uses=2] 20 store i32 %argc, i32* %argc.addr 21 store i8** %argv, i8*** %argv.addr 22 store i32 0, i32* %val1 23 store i32 31, i32* %val2 24 store i32 3855, i32* %andt 25 store i32 3855, i32* %ort 26 store i32 3855, i32* %xort 27 store i32 4, i32* %temp 28 %tmp = load i32, i32* %temp 29 ; CHECK: ldrex 30 ; CHECK: add 31 ; CHECK: strex 32 ; CHECK-T1: bl ___sync_fetch_and_add_4 33 ; CHECK-BAREMETAL: add 34 ; CHECK-BAREMETAL-NOT: __sync 35 %0 = atomicrmw add i32* %val1, i32 %tmp monotonic 36 store i32 %0, i32* %old 37 ; CHECK: ldrex 38 ; CHECK: sub 39 ; CHECK: strex 40 ; CHECK-T1: bl ___sync_fetch_and_sub_4 41 ; CHECK-BAREMETAL: sub 42 ; CHECK-BAREMETAL-NOT: __sync 43 %1 = atomicrmw sub i32* %val2, i32 30 monotonic 44 store i32 %1, i32* %old 45 ; CHECK: ldrex 46 ; CHECK: add 47 ; CHECK: strex 48 ; CHECK-T1: bl ___sync_fetch_and_add_4 49 ; CHECK-BAREMETAL: add 50 ; CHECK-BAREMETAL-NOT: __sync 51 %2 = atomicrmw add i32* %val2, i32 1 monotonic 52 store i32 %2, i32* %old 53 ; CHECK: ldrex 54 ; CHECK: sub 55 ; CHECK: strex 56 ; CHECK-T1: bl ___sync_fetch_and_sub_4 57 ; CHECK-BAREMETAL: sub 58 ; CHECK-BAREMETAL-NOT: __sync 59 %3 = atomicrmw sub i32* %val2, i32 1 monotonic 60 store i32 %3, i32* %old 61 ; CHECK: ldrex 62 ; CHECK: and 63 ; CHECK: strex 64 ; CHECK-T1: bl ___sync_fetch_and_and_4 65 ; CHECK-BAREMETAL: and 66 ; CHECK-BAREMETAL-NOT: __sync 67 %4 = atomicrmw and i32* %andt, i32 4080 monotonic 68 store i32 %4, i32* %old 69 ; CHECK: ldrex 70 ; CHECK: or 71 ; CHECK: strex 72 ; CHECK-T1: bl ___sync_fetch_and_or_4 73 ; CHECK-BAREMETAL: or 74 ; CHECK-BAREMETAL-NOT: __sync 75 %5 = atomicrmw or i32* %ort, i32 4080 monotonic 76 store i32 %5, i32* %old 77 ; CHECK: ldrex 78 ; CHECK: eor 79 ; CHECK: strex 80 ; CHECK-T1: bl ___sync_fetch_and_xor_4 81 ; CHECK-BAREMETAL: eor 82 ; CHECK-BAREMETAL-NOT: __sync 83 %6 = atomicrmw xor i32* %xort, i32 4080 monotonic 84 store i32 %6, i32* %old 85 ; CHECK: ldrex 86 ; CHECK: cmp 87 ; CHECK: strex 88 ; CHECK-T1: bl ___sync_fetch_and_min_4 89 ; CHECK-BAREMETAL: cmp 90 ; CHECK-BAREMETAL-NOT: __sync 91 %7 = atomicrmw min i32* %val2, i32 16 monotonic 92 store i32 %7, i32* %old 93 %neg = sub i32 0, 1 94 ; CHECK: ldrex 95 ; CHECK: cmp 96 ; CHECK: strex 97 ; CHECK-T1: bl ___sync_fetch_and_min_4 98 ; CHECK-BAREMETAL: cmp 99 ; CHECK-BAREMETAL-NOT: __sync 100 %8 = atomicrmw min i32* %val2, i32 %neg monotonic 101 store i32 %8, i32* %old 102 ; CHECK: ldrex 103 ; CHECK: cmp 104 ; CHECK: strex 105 ; CHECK-T1: bl ___sync_fetch_and_max_4 106 ; CHECK-BAREMETAL: cmp 107 ; CHECK-BAREMETAL-NOT: __sync 108 %9 = atomicrmw max i32* %val2, i32 1 monotonic 109 store i32 %9, i32* %old 110 ; CHECK: ldrex 111 ; CHECK: cmp 112 ; CHECK: strex 113 ; CHECK-T1: bl ___sync_fetch_and_max_4 114 ; CHECK-BAREMETAL: cmp 115 ; CHECK-BAREMETAL-NOT: __sync 116 %10 = atomicrmw max i32* %val2, i32 0 monotonic 117 store i32 %10, i32* %old 118 ; CHECK: ldrex 119 ; CHECK: cmp 120 ; CHECK: strex 121 ; CHECK-T1: bl ___sync_fetch_and_umin_4 122 ; CHECK-BAREMETAL: cmp 123 ; CHECK-BAREMETAL-NOT: __sync 124 %11 = atomicrmw umin i32* %val2, i32 16 monotonic 125 store i32 %11, i32* %old 126 %uneg = sub i32 0, 1 127 ; CHECK: ldrex 128 ; CHECK: cmp 129 ; CHECK: strex 130 ; CHECK-T1: bl ___sync_fetch_and_umin_4 131 ; CHECK-BAREMETAL: cmp 132 ; CHECK-BAREMETAL-NOT: __sync 133 %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic 134 store i32 %12, i32* %old 135 ; CHECK: ldrex 136 ; CHECK: cmp 137 ; CHECK: strex 138 ; CHECK-T1: bl ___sync_fetch_and_umax_4 139 ; CHECK-BAREMETAL: cmp 140 ; CHECK-BAREMETAL-NOT: __sync 141 %13 = atomicrmw umax i32* %val2, i32 1 monotonic 142 store i32 %13, i32* %old 143 ; CHECK: ldrex 144 ; CHECK: cmp 145 ; CHECK: strex 146 ; CHECK-T1: bl ___sync_fetch_and_umax_4 147 ; CHECK-BAREMETAL: cmp 148 ; CHECK-BAREMETAL-NOT: __sync 149 %14 = atomicrmw umax i32* %val2, i32 0 monotonic 150 store i32 %14, i32* %old 151 152 ret void 153 } 154 155 define void @func2() nounwind { 156 entry: 157 %val = alloca i16 158 %old = alloca i16 159 store i16 31, i16* %val 160 ; CHECK: ldrex 161 ; CHECK: cmp 162 ; CHECK: strex 163 ; CHECK-T1: bl ___sync_fetch_and_umin_2 164 ; CHECK-BAREMETAL: cmp 165 ; CHECK-BAREMETAL-NOT: __sync 166 %0 = atomicrmw umin i16* %val, i16 16 monotonic 167 store i16 %0, i16* %old 168 %uneg = sub i16 0, 1 169 ; CHECK: ldrex 170 ; CHECK: cmp 171 ; CHECK: strex 172 ; CHECK-T1: bl ___sync_fetch_and_umin_2 173 ; CHECK-BAREMETAL: cmp 174 ; CHECK-BAREMETAL-NOT: __sync 175 %1 = atomicrmw umin i16* %val, i16 %uneg monotonic 176 store i16 %1, i16* %old 177 ; CHECK: ldrex 178 ; CHECK: cmp 179 ; CHECK: strex 180 ; CHECK-T1: bl ___sync_fetch_and_umax_2 181 ; CHECK-BAREMETAL: cmp 182 ; CHECK-BAREMETAL-NOT: __sync 183 %2 = atomicrmw umax i16* %val, i16 1 monotonic 184 store i16 %2, i16* %old 185 ; CHECK: ldrex 186 ; CHECK: cmp 187 ; CHECK: strex 188 ; CHECK-T1: bl ___sync_fetch_and_umax_2 189 ; CHECK-BAREMETAL: cmp 190 ; CHECK-BAREMETAL-NOT: __sync 191 %3 = atomicrmw umax i16* %val, i16 0 monotonic 192 store i16 %3, i16* %old 193 ret void 194 } 195 196 define void @func3() nounwind { 197 entry: 198 %val = alloca i8 199 %old = alloca i8 200 store i8 31, i8* %val 201 ; CHECK: ldrex 202 ; CHECK: cmp 203 ; CHECK: strex 204 ; CHECK-T1: bl ___sync_fetch_and_umin_1 205 ; CHECK-BAREMETAL: cmp 206 ; CHECK-BAREMETAL-NOT: __sync 207 %0 = atomicrmw umin i8* %val, i8 16 monotonic 208 store i8 %0, i8* %old 209 ; CHECK: ldrex 210 ; CHECK: cmp 211 ; CHECK: strex 212 ; CHECK-T1: bl ___sync_fetch_and_umin_1 213 ; CHECK-BAREMETAL: cmp 214 ; CHECK-BAREMETAL-NOT: __sync 215 %uneg = sub i8 0, 1 216 %1 = atomicrmw umin i8* %val, i8 %uneg monotonic 217 store i8 %1, i8* %old 218 ; CHECK: ldrex 219 ; CHECK: cmp 220 ; CHECK: strex 221 ; CHECK-T1: bl ___sync_fetch_and_umax_1 222 ; CHECK-BAREMETAL: cmp 223 ; CHECK-BAREMETAL-NOT: __sync 224 %2 = atomicrmw umax i8* %val, i8 1 monotonic 225 store i8 %2, i8* %old 226 ; CHECK: ldrex 227 ; CHECK: cmp 228 ; CHECK: strex 229 ; CHECK-T1: bl ___sync_fetch_and_umax_1 230 ; CHECK-BAREMETAL: cmp 231 ; CHECK-BAREMETAL-NOT: __sync 232 %3 = atomicrmw umax i8* %val, i8 0 monotonic 233 store i8 %3, i8* %old 234 ret void 235 } 236 237 ; CHECK: func4 238 ; This function should not need to use callee-saved registers. 239 ; rdar://problem/12203728 240 ; CHECK-NOT: r4 241 define i32 @func4(i32* %p) nounwind optsize ssp { 242 entry: 243 %0 = atomicrmw add i32* %p, i32 1 monotonic 244 ret i32 %0 245 } 246 247 define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) { 248 ; CHECK-LABEL: test_cmpxchg_fail_order: 249 250 %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic 251 %oldval = extractvalue { i32, i1 } %pair, 0 252 ; CHECK-ARMV7: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] 253 ; CHECK-ARMV7: cmp [[OLDVAL]], r1 254 ; CHECK-ARMV7: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]] 255 ; CHECK-ARMV7: dmb ish 256 ; CHECK-ARMV7: [[LOOP_BB:\.?LBB.*]]: 257 ; CHECK-ARMV7: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]] 258 ; CHECK-ARMV7: cmp [[SUCCESS]], #0 259 ; CHECK-ARMV7: beq [[SUCCESS_BB:\.?LBB.*]] 260 ; CHECK-ARMV7: ldrex [[OLDVAL]], [r[[ADDR]]] 261 ; CHECK-ARMV7: cmp [[OLDVAL]], r1 262 ; CHECK-ARMV7: beq [[LOOP_BB]] 263 ; CHECK-ARMV7: [[FAIL_BB]]: 264 ; CHECK-ARMV7: clrex 265 ; CHECK-ARMV7: bx lr 266 ; CHECK-ARMV7: [[SUCCESS_BB]]: 267 ; CHECK-ARMV7: dmb ish 268 ; CHECK-ARMV7: bx lr 269 270 ; CHECK-T2: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] 271 ; CHECK-T2: cmp [[OLDVAL]], r1 272 ; CHECK-T2: bne [[FAIL_BB:\.?LBB.*]] 273 ; CHECK-T2: dmb ish 274 ; CHECK-T2: [[LOOP_BB:\.?LBB.*]]: 275 ; CHECK-T2: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]] 276 ; CHECK-T2: cmp [[SUCCESS]], #0 277 ; CHECK-T2: dmbeq ish 278 ; CHECK-T2: bxeq lr 279 ; CHECK-T2: ldrex [[OLDVAL]], [r[[ADDR]]] 280 ; CHECK-T2: cmp [[OLDVAL]], r1 281 ; CHECK-T2: beq [[LOOP_BB]] 282 ; CHECK-T2: clrex 283 284 ret i32 %oldval 285 } 286 287 define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) { 288 ; CHECK-LABEL: test_cmpxchg_fail_order1: 289 290 %pair = cmpxchg i32* %addr, i32 %desired, i32 %new acquire acquire 291 %oldval = extractvalue { i32, i1 } %pair, 0 292 ; CHECK-NOT: dmb ish 293 ; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]: 294 ; CHECK: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] 295 ; CHECK: cmp [[OLDVAL]], r1 296 ; CHECK: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]] 297 ; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]] 298 ; CHECK: cmp [[SUCCESS]], #0 299 ; CHECK: bne [[LOOP_BB]] 300 ; CHECK: b [[END_BB:\.?LBB[0-9]+_[0-9]+]] 301 ; CHECK: [[FAIL_BB]]: 302 ; CHECK-NEXT: clrex 303 ; CHECK-NEXT: [[END_BB]]: 304 ; CHECK: dmb ish 305 ; CHECK: bx lr 306 307 ret i32 %oldval 308 } 309 310 define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind { 311 ; CHECK-LABEL: load_load_add_acquire 312 %val1 = load atomic i32, i32* %mem1 acquire, align 4 313 %val2 = load atomic i32, i32* %mem2 acquire, align 4 314 %tmp = add i32 %val1, %val2 315 316 ; CHECK: ldr {{r[0-9]}}, [r0] 317 ; CHECK: dmb 318 ; CHECK: ldr {{r[0-9]}}, [r1] 319 ; CHECK: dmb 320 ; CHECK: add r0, 321 322 ; CHECK-T1: ___sync_val_compare_and_swap_4 323 ; CHECK-T1: ___sync_val_compare_and_swap_4 324 325 ; CHECK-BAREMETAL: ldr {{r[0-9]}}, [r0] 326 ; CHECK-BAREMETAL-NOT: dmb 327 ; CHECK-BAREMETAL: ldr {{r[0-9]}}, [r1] 328 ; CHECK-BAREMETAL-NOT: dmb 329 ; CHECK-BAREMETAL: add r0, 330 331 ret i32 %tmp 332 } 333 334 define void @store_store_release(i32* %mem1, i32 %val1, i32* %mem2, i32 %val2) { 335 ; CHECK-LABEL: store_store_release 336 store atomic i32 %val1, i32* %mem1 release, align 4 337 store atomic i32 %val2, i32* %mem2 release, align 4 338 339 ; CHECK: dmb 340 ; CHECK: str r1, [r0] 341 ; CHECK: dmb 342 ; CHECK: str r3, [r2] 343 344 ; CHECK-T1: ___sync_lock_test_and_set 345 ; CHECK-T1: ___sync_lock_test_and_set 346 347 ; CHECK-BAREMETAL-NOT: dmb 348 ; CHECK-BAREMTEAL: str r1, [r0] 349 ; CHECK-BAREMETAL-NOT: dmb 350 ; CHECK-BAREMTEAL: str r3, [r2] 351 352 ret void 353 } 354 355 define void @load_fence_store_monotonic(i32* %mem1, i32* %mem2) { 356 ; CHECK-LABEL: load_fence_store_monotonic 357 %val = load atomic i32, i32* %mem1 monotonic, align 4 358 fence seq_cst 359 store atomic i32 %val, i32* %mem2 monotonic, align 4 360 361 ; CHECK: ldr [[R0:r[0-9]]], [r0] 362 ; CHECK: dmb 363 ; CHECK: str [[R0]], [r1] 364 365 ; CHECK-T1: ldr [[R0:r[0-9]]], [{{r[0-9]+}}] 366 ; CHECK-T1: {{dmb|bl ___sync_synchronize}} 367 ; CHECK-T1: str [[R0]], [{{r[0-9]+}}] 368 369 ; CHECK-BAREMETAL: ldr [[R0:r[0-9]]], [r0] 370 ; CHECK-BAREMETAL-NOT: dmb 371 ; CHECK-BAREMETAL: str [[R0]], [r1] 372 373 ret void 374 } 375