1 #ifndef Py_ATOMIC_H 2 #define Py_ATOMIC_H 3 #ifdef Py_BUILD_CORE 4 5 #include "dynamic_annotations.h" 6 7 #include "pyconfig.h" 8 9 #if defined(HAVE_STD_ATOMIC) 10 #include <stdatomic.h> 11 #endif 12 13 14 #if defined(_MSC_VER) 15 #include <intrin.h> 16 #include <immintrin.h> 17 #endif 18 19 /* This is modeled after the atomics interface from C1x, according to 20 * the draft at 21 * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf. 22 * Operations and types are named the same except with a _Py_ prefix 23 * and have the same semantics. 24 * 25 * Beware, the implementations here are deep magic. 26 */ 27 28 #if defined(HAVE_STD_ATOMIC) 29 30 typedef enum _Py_memory_order { 31 _Py_memory_order_relaxed = memory_order_relaxed, 32 _Py_memory_order_acquire = memory_order_acquire, 33 _Py_memory_order_release = memory_order_release, 34 _Py_memory_order_acq_rel = memory_order_acq_rel, 35 _Py_memory_order_seq_cst = memory_order_seq_cst 36 } _Py_memory_order; 37 38 typedef struct _Py_atomic_address { 39 atomic_uintptr_t _value; 40 } _Py_atomic_address; 41 42 typedef struct _Py_atomic_int { 43 atomic_int _value; 44 } _Py_atomic_int; 45 46 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \ 47 atomic_signal_fence(ORDER) 48 49 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \ 50 atomic_thread_fence(ORDER) 51 52 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 53 atomic_store_explicit(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER) 54 55 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 56 atomic_load_explicit(&(ATOMIC_VAL)->_value, ORDER) 57 58 /* Use builtin atomic operations in GCC >= 4.7 */ 59 #elif defined(HAVE_BUILTIN_ATOMIC) 60 61 typedef enum _Py_memory_order { 62 _Py_memory_order_relaxed = __ATOMIC_RELAXED, 63 _Py_memory_order_acquire = __ATOMIC_ACQUIRE, 64 _Py_memory_order_release = __ATOMIC_RELEASE, 65 _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL, 66 _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST 67 } _Py_memory_order; 68 69 typedef struct _Py_atomic_address { 70 uintptr_t _value; 71 } _Py_atomic_address; 72 73 typedef struct _Py_atomic_int { 74 int _value; 75 } _Py_atomic_int; 76 77 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \ 78 __atomic_signal_fence(ORDER) 79 80 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \ 81 __atomic_thread_fence(ORDER) 82 83 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 84 (assert((ORDER) == __ATOMIC_RELAXED \ 85 || (ORDER) == __ATOMIC_SEQ_CST \ 86 || (ORDER) == __ATOMIC_RELEASE), \ 87 __atomic_store_n(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER)) 88 89 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 90 (assert((ORDER) == __ATOMIC_RELAXED \ 91 || (ORDER) == __ATOMIC_SEQ_CST \ 92 || (ORDER) == __ATOMIC_ACQUIRE \ 93 || (ORDER) == __ATOMIC_CONSUME), \ 94 __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER)) 95 96 /* Only support GCC (for expression statements) and x86 (for simple 97 * atomic semantics) and MSVC x86/x64/ARM */ 98 #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64)) 99 typedef enum _Py_memory_order { 100 _Py_memory_order_relaxed, 101 _Py_memory_order_acquire, 102 _Py_memory_order_release, 103 _Py_memory_order_acq_rel, 104 _Py_memory_order_seq_cst 105 } _Py_memory_order; 106 107 typedef struct _Py_atomic_address { 108 uintptr_t _value; 109 } _Py_atomic_address; 110 111 typedef struct _Py_atomic_int { 112 int _value; 113 } _Py_atomic_int; 114 115 116 static __inline__ void 117 _Py_atomic_signal_fence(_Py_memory_order order) 118 { 119 if (order != _Py_memory_order_relaxed) 120 __asm__ volatile("":::"memory"); 121 } 122 123 static __inline__ void 124 _Py_atomic_thread_fence(_Py_memory_order order) 125 { 126 if (order != _Py_memory_order_relaxed) 127 __asm__ volatile("mfence":::"memory"); 128 } 129 130 /* Tell the race checker about this operation's effects. */ 131 static __inline__ void 132 _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) 133 { 134 (void)address; /* shut up -Wunused-parameter */ 135 switch(order) { 136 case _Py_memory_order_release: 137 case _Py_memory_order_acq_rel: 138 case _Py_memory_order_seq_cst: 139 _Py_ANNOTATE_HAPPENS_BEFORE(address); 140 break; 141 case _Py_memory_order_relaxed: 142 case _Py_memory_order_acquire: 143 break; 144 } 145 switch(order) { 146 case _Py_memory_order_acquire: 147 case _Py_memory_order_acq_rel: 148 case _Py_memory_order_seq_cst: 149 _Py_ANNOTATE_HAPPENS_AFTER(address); 150 break; 151 case _Py_memory_order_relaxed: 152 case _Py_memory_order_release: 153 break; 154 } 155 } 156 157 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 158 __extension__ ({ \ 159 __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \ 160 __typeof__(atomic_val->_value) new_val = NEW_VAL;\ 161 volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \ 162 _Py_memory_order order = ORDER; \ 163 _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \ 164 \ 165 /* Perform the operation. */ \ 166 _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \ 167 switch(order) { \ 168 case _Py_memory_order_release: \ 169 _Py_atomic_signal_fence(_Py_memory_order_release); \ 170 /* fallthrough */ \ 171 case _Py_memory_order_relaxed: \ 172 *volatile_data = new_val; \ 173 break; \ 174 \ 175 case _Py_memory_order_acquire: \ 176 case _Py_memory_order_acq_rel: \ 177 case _Py_memory_order_seq_cst: \ 178 __asm__ volatile("xchg %0, %1" \ 179 : "+r"(new_val) \ 180 : "m"(atomic_val->_value) \ 181 : "memory"); \ 182 break; \ 183 } \ 184 _Py_ANNOTATE_IGNORE_WRITES_END(); \ 185 }) 186 187 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 188 __extension__ ({ \ 189 __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \ 190 __typeof__(atomic_val->_value) result; \ 191 volatile __typeof__(result) *volatile_data = &atomic_val->_value; \ 192 _Py_memory_order order = ORDER; \ 193 _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \ 194 \ 195 /* Perform the operation. */ \ 196 _Py_ANNOTATE_IGNORE_READS_BEGIN(); \ 197 switch(order) { \ 198 case _Py_memory_order_release: \ 199 case _Py_memory_order_acq_rel: \ 200 case _Py_memory_order_seq_cst: \ 201 /* Loads on x86 are not releases by default, so need a */ \ 202 /* thread fence. */ \ 203 _Py_atomic_thread_fence(_Py_memory_order_release); \ 204 break; \ 205 default: \ 206 /* No fence */ \ 207 break; \ 208 } \ 209 result = *volatile_data; \ 210 switch(order) { \ 211 case _Py_memory_order_acquire: \ 212 case _Py_memory_order_acq_rel: \ 213 case _Py_memory_order_seq_cst: \ 214 /* Loads on x86 are automatically acquire operations so */ \ 215 /* can get by with just a compiler fence. */ \ 216 _Py_atomic_signal_fence(_Py_memory_order_acquire); \ 217 break; \ 218 default: \ 219 /* No fence */ \ 220 break; \ 221 } \ 222 _Py_ANNOTATE_IGNORE_READS_END(); \ 223 result; \ 224 }) 225 226 #elif defined(_MSC_VER) 227 /* _Interlocked* functions provide a full memory barrier and are therefore 228 enough for acq_rel and seq_cst. If the HLE variants aren't available 229 in hardware they will fall back to a full memory barrier as well. 230 231 This might affect performance but likely only in some very specific and 232 hard to meassure scenario. 233 */ 234 #if defined(_M_IX86) || defined(_M_X64) 235 typedef enum _Py_memory_order { 236 _Py_memory_order_relaxed, 237 _Py_memory_order_acquire, 238 _Py_memory_order_release, 239 _Py_memory_order_acq_rel, 240 _Py_memory_order_seq_cst 241 } _Py_memory_order; 242 243 typedef struct _Py_atomic_address { 244 volatile uintptr_t _value; 245 } _Py_atomic_address; 246 247 typedef struct _Py_atomic_int { 248 volatile int _value; 249 } _Py_atomic_int; 250 251 252 #if defined(_M_X64) 253 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ 254 switch (ORDER) { \ 255 case _Py_memory_order_acquire: \ 256 _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ 257 break; \ 258 case _Py_memory_order_release: \ 259 _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ 260 break; \ 261 default: \ 262 _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ 263 break; \ 264 } 265 #else 266 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); 267 #endif 268 269 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ 270 switch (ORDER) { \ 271 case _Py_memory_order_acquire: \ 272 _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ 273 break; \ 274 case _Py_memory_order_release: \ 275 _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ 276 break; \ 277 default: \ 278 _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ 279 break; \ 280 } 281 282 #if defined(_M_X64) 283 /* This has to be an intptr_t for now. 284 gil_created() uses -1 as a sentinel value, if this returns 285 a uintptr_t it will do an unsigned compare and crash 286 */ 287 inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) { 288 __int64 old; 289 switch (order) { 290 case _Py_memory_order_acquire: 291 { 292 do { 293 old = *value; 294 } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old); 295 break; 296 } 297 case _Py_memory_order_release: 298 { 299 do { 300 old = *value; 301 } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old); 302 break; 303 } 304 case _Py_memory_order_relaxed: 305 old = *value; 306 break; 307 default: 308 { 309 do { 310 old = *value; 311 } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old); 312 break; 313 } 314 } 315 return old; 316 } 317 318 #else 319 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL 320 #endif 321 322 inline int _Py_atomic_load_32bit(volatile int* value, int order) { 323 long old; 324 switch (order) { 325 case _Py_memory_order_acquire: 326 { 327 do { 328 old = *value; 329 } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old); 330 break; 331 } 332 case _Py_memory_order_release: 333 { 334 do { 335 old = *value; 336 } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old); 337 break; 338 } 339 case _Py_memory_order_relaxed: 340 old = *value; 341 break; 342 default: 343 { 344 do { 345 old = *value; 346 } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old); 347 break; 348 } 349 } 350 return old; 351 } 352 353 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 354 if (sizeof(*ATOMIC_VAL._value) == 8) { \ 355 _Py_atomic_store_64bit((volatile long long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \ 356 _Py_atomic_store_32bit((volatile long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } 357 358 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 359 ( \ 360 sizeof(*(ATOMIC_VAL._value)) == 8 ? \ 361 _Py_atomic_load_64bit((volatile long long*)ATOMIC_VAL._value, ORDER) : \ 362 _Py_atomic_load_32bit((volatile long*)ATOMIC_VAL._value, ORDER) \ 363 ) 364 #elif defined(_M_ARM) || defined(_M_ARM64) 365 typedef enum _Py_memory_order { 366 _Py_memory_order_relaxed, 367 _Py_memory_order_acquire, 368 _Py_memory_order_release, 369 _Py_memory_order_acq_rel, 370 _Py_memory_order_seq_cst 371 } _Py_memory_order; 372 373 typedef struct _Py_atomic_address { 374 volatile uintptr_t _value; 375 } _Py_atomic_address; 376 377 typedef struct _Py_atomic_int { 378 volatile int _value; 379 } _Py_atomic_int; 380 381 382 #if defined(_M_ARM64) 383 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ 384 switch (ORDER) { \ 385 case _Py_memory_order_acquire: \ 386 _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ 387 break; \ 388 case _Py_memory_order_release: \ 389 _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ 390 break; \ 391 default: \ 392 _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ 393 break; \ 394 } 395 #else 396 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); 397 #endif 398 399 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ 400 switch (ORDER) { \ 401 case _Py_memory_order_acquire: \ 402 _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ 403 break; \ 404 case _Py_memory_order_release: \ 405 _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ 406 break; \ 407 default: \ 408 _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ 409 break; \ 410 } 411 412 #if defined(_M_ARM64) 413 /* This has to be an intptr_t for now. 414 gil_created() uses -1 as a sentinel value, if this returns 415 a uintptr_t it will do an unsigned compare and crash 416 */ 417 inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) { 418 uintptr_t old; 419 switch (order) { 420 case _Py_memory_order_acquire: 421 { 422 do { 423 old = *value; 424 } while(_InterlockedCompareExchange64_acq(value, old, old) != old); 425 break; 426 } 427 case _Py_memory_order_release: 428 { 429 do { 430 old = *value; 431 } while(_InterlockedCompareExchange64_rel(value, old, old) != old); 432 break; 433 } 434 case _Py_memory_order_relaxed: 435 old = *value; 436 break; 437 default: 438 { 439 do { 440 old = *value; 441 } while(_InterlockedCompareExchange64(value, old, old) != old); 442 break; 443 } 444 } 445 return old; 446 } 447 448 #else 449 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL 450 #endif 451 452 inline int _Py_atomic_load_32bit(volatile int* value, int order) { 453 int old; 454 switch (order) { 455 case _Py_memory_order_acquire: 456 { 457 do { 458 old = *value; 459 } while(_InterlockedCompareExchange_acq(value, old, old) != old); 460 break; 461 } 462 case _Py_memory_order_release: 463 { 464 do { 465 old = *value; 466 } while(_InterlockedCompareExchange_rel(value, old, old) != old); 467 break; 468 } 469 case _Py_memory_order_relaxed: 470 old = *value; 471 break; 472 default: 473 { 474 do { 475 old = *value; 476 } while(_InterlockedCompareExchange(value, old, old) != old); 477 break; 478 } 479 } 480 return old; 481 } 482 483 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 484 if (sizeof(*ATOMIC_VAL._value) == 8) { \ 485 _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \ 486 _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } 487 488 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 489 ( \ 490 sizeof(*(ATOMIC_VAL._value)) == 8 ? \ 491 _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \ 492 _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \ 493 ) 494 #endif 495 #else /* !gcc x86 !_msc_ver */ 496 typedef enum _Py_memory_order { 497 _Py_memory_order_relaxed, 498 _Py_memory_order_acquire, 499 _Py_memory_order_release, 500 _Py_memory_order_acq_rel, 501 _Py_memory_order_seq_cst 502 } _Py_memory_order; 503 504 typedef struct _Py_atomic_address { 505 uintptr_t _value; 506 } _Py_atomic_address; 507 508 typedef struct _Py_atomic_int { 509 int _value; 510 } _Py_atomic_int; 511 /* Fall back to other compilers and processors by assuming that simple 512 volatile accesses are atomic. This is false, so people should port 513 this. */ 514 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0) 515 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0) 516 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 517 ((ATOMIC_VAL)->_value = NEW_VAL) 518 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 519 ((ATOMIC_VAL)->_value) 520 #endif 521 522 /* Standardized shortcuts. */ 523 #define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \ 524 _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_seq_cst) 525 #define _Py_atomic_load(ATOMIC_VAL) \ 526 _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_seq_cst) 527 528 /* Python-local extensions */ 529 530 #define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \ 531 _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed) 532 #define _Py_atomic_load_relaxed(ATOMIC_VAL) \ 533 _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed) 534 #endif /* Py_BUILD_CORE */ 535 #endif /* Py_ATOMIC_H */ 536