Home | History | Annotate | Download | only in Include
      1 #ifndef Py_ATOMIC_H
      2 #define Py_ATOMIC_H
      3 #ifdef Py_BUILD_CORE
      4 
      5 #include "dynamic_annotations.h"
      6 
      7 #include "pyconfig.h"
      8 
      9 #if defined(HAVE_STD_ATOMIC)
     10 #include <stdatomic.h>
     11 #endif
     12 
     13 
     14 #if defined(_MSC_VER)
     15 #include <intrin.h>
     16 #include <immintrin.h>
     17 #endif
     18 
     19 /* This is modeled after the atomics interface from C1x, according to
     20  * the draft at
     21  * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
     22  * Operations and types are named the same except with a _Py_ prefix
     23  * and have the same semantics.
     24  *
     25  * Beware, the implementations here are deep magic.
     26  */
     27 
     28 #if defined(HAVE_STD_ATOMIC)
     29 
     30 typedef enum _Py_memory_order {
     31     _Py_memory_order_relaxed = memory_order_relaxed,
     32     _Py_memory_order_acquire = memory_order_acquire,
     33     _Py_memory_order_release = memory_order_release,
     34     _Py_memory_order_acq_rel = memory_order_acq_rel,
     35     _Py_memory_order_seq_cst = memory_order_seq_cst
     36 } _Py_memory_order;
     37 
     38 typedef struct _Py_atomic_address {
     39     atomic_uintptr_t _value;
     40 } _Py_atomic_address;
     41 
     42 typedef struct _Py_atomic_int {
     43     atomic_int _value;
     44 } _Py_atomic_int;
     45 
     46 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
     47     atomic_signal_fence(ORDER)
     48 
     49 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
     50     atomic_thread_fence(ORDER)
     51 
     52 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
     53     atomic_store_explicit(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER)
     54 
     55 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
     56     atomic_load_explicit(&(ATOMIC_VAL)->_value, ORDER)
     57 
     58 /* Use builtin atomic operations in GCC >= 4.7 */
     59 #elif defined(HAVE_BUILTIN_ATOMIC)
     60 
     61 typedef enum _Py_memory_order {
     62     _Py_memory_order_relaxed = __ATOMIC_RELAXED,
     63     _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
     64     _Py_memory_order_release = __ATOMIC_RELEASE,
     65     _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
     66     _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
     67 } _Py_memory_order;
     68 
     69 typedef struct _Py_atomic_address {
     70     uintptr_t _value;
     71 } _Py_atomic_address;
     72 
     73 typedef struct _Py_atomic_int {
     74     int _value;
     75 } _Py_atomic_int;
     76 
     77 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
     78     __atomic_signal_fence(ORDER)
     79 
     80 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
     81     __atomic_thread_fence(ORDER)
     82 
     83 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
     84     (assert((ORDER) == __ATOMIC_RELAXED                       \
     85             || (ORDER) == __ATOMIC_SEQ_CST                    \
     86             || (ORDER) == __ATOMIC_RELEASE),                  \
     87      __atomic_store_n(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER))
     88 
     89 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER)           \
     90     (assert((ORDER) == __ATOMIC_RELAXED                       \
     91             || (ORDER) == __ATOMIC_SEQ_CST                    \
     92             || (ORDER) == __ATOMIC_ACQUIRE                    \
     93             || (ORDER) == __ATOMIC_CONSUME),                  \
     94      __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER))
     95 
     96 /* Only support GCC (for expression statements) and x86 (for simple
     97  * atomic semantics) and MSVC x86/x64/ARM */
     98 #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
     99 typedef enum _Py_memory_order {
    100     _Py_memory_order_relaxed,
    101     _Py_memory_order_acquire,
    102     _Py_memory_order_release,
    103     _Py_memory_order_acq_rel,
    104     _Py_memory_order_seq_cst
    105 } _Py_memory_order;
    106 
    107 typedef struct _Py_atomic_address {
    108     uintptr_t _value;
    109 } _Py_atomic_address;
    110 
    111 typedef struct _Py_atomic_int {
    112     int _value;
    113 } _Py_atomic_int;
    114 
    115 
    116 static __inline__ void
    117 _Py_atomic_signal_fence(_Py_memory_order order)
    118 {
    119     if (order != _Py_memory_order_relaxed)
    120         __asm__ volatile("":::"memory");
    121 }
    122 
    123 static __inline__ void
    124 _Py_atomic_thread_fence(_Py_memory_order order)
    125 {
    126     if (order != _Py_memory_order_relaxed)
    127         __asm__ volatile("mfence":::"memory");
    128 }
    129 
    130 /* Tell the race checker about this operation's effects. */
    131 static __inline__ void
    132 _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
    133 {
    134     (void)address;              /* shut up -Wunused-parameter */
    135     switch(order) {
    136     case _Py_memory_order_release:
    137     case _Py_memory_order_acq_rel:
    138     case _Py_memory_order_seq_cst:
    139         _Py_ANNOTATE_HAPPENS_BEFORE(address);
    140         break;
    141     case _Py_memory_order_relaxed:
    142     case _Py_memory_order_acquire:
    143         break;
    144     }
    145     switch(order) {
    146     case _Py_memory_order_acquire:
    147     case _Py_memory_order_acq_rel:
    148     case _Py_memory_order_seq_cst:
    149         _Py_ANNOTATE_HAPPENS_AFTER(address);
    150         break;
    151     case _Py_memory_order_relaxed:
    152     case _Py_memory_order_release:
    153         break;
    154     }
    155 }
    156 
    157 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
    158     __extension__ ({ \
    159         __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
    160         __typeof__(atomic_val->_value) new_val = NEW_VAL;\
    161         volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
    162         _Py_memory_order order = ORDER; \
    163         _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
    164         \
    165         /* Perform the operation. */ \
    166         _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
    167         switch(order) { \
    168         case _Py_memory_order_release: \
    169             _Py_atomic_signal_fence(_Py_memory_order_release); \
    170             /* fallthrough */ \
    171         case _Py_memory_order_relaxed: \
    172             *volatile_data = new_val; \
    173             break; \
    174         \
    175         case _Py_memory_order_acquire: \
    176         case _Py_memory_order_acq_rel: \
    177         case _Py_memory_order_seq_cst: \
    178             __asm__ volatile("xchg %0, %1" \
    179                          : "+r"(new_val) \
    180                          : "m"(atomic_val->_value) \
    181                          : "memory"); \
    182             break; \
    183         } \
    184         _Py_ANNOTATE_IGNORE_WRITES_END(); \
    185     })
    186 
    187 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
    188     __extension__ ({  \
    189         __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
    190         __typeof__(atomic_val->_value) result; \
    191         volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
    192         _Py_memory_order order = ORDER; \
    193         _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
    194         \
    195         /* Perform the operation. */ \
    196         _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
    197         switch(order) { \
    198         case _Py_memory_order_release: \
    199         case _Py_memory_order_acq_rel: \
    200         case _Py_memory_order_seq_cst: \
    201             /* Loads on x86 are not releases by default, so need a */ \
    202             /* thread fence. */ \
    203             _Py_atomic_thread_fence(_Py_memory_order_release); \
    204             break; \
    205         default: \
    206             /* No fence */ \
    207             break; \
    208         } \
    209         result = *volatile_data; \
    210         switch(order) { \
    211         case _Py_memory_order_acquire: \
    212         case _Py_memory_order_acq_rel: \
    213         case _Py_memory_order_seq_cst: \
    214             /* Loads on x86 are automatically acquire operations so */ \
    215             /* can get by with just a compiler fence. */ \
    216             _Py_atomic_signal_fence(_Py_memory_order_acquire); \
    217             break; \
    218         default: \
    219             /* No fence */ \
    220             break; \
    221         } \
    222         _Py_ANNOTATE_IGNORE_READS_END(); \
    223         result; \
    224     })
    225 
    226 #elif defined(_MSC_VER)
    227 /*  _Interlocked* functions provide a full memory barrier and are therefore
    228     enough for acq_rel and seq_cst. If the HLE variants aren't available
    229     in hardware they will fall back to a full memory barrier as well.
    230 
    231     This might affect performance but likely only in some very specific and
    232     hard to meassure scenario.
    233 */
    234 #if defined(_M_IX86) || defined(_M_X64)
    235 typedef enum _Py_memory_order {
    236     _Py_memory_order_relaxed,
    237     _Py_memory_order_acquire,
    238     _Py_memory_order_release,
    239     _Py_memory_order_acq_rel,
    240     _Py_memory_order_seq_cst
    241 } _Py_memory_order;
    242 
    243 typedef struct _Py_atomic_address {
    244     volatile uintptr_t _value;
    245 } _Py_atomic_address;
    246 
    247 typedef struct _Py_atomic_int {
    248     volatile int _value;
    249 } _Py_atomic_int;
    250 
    251 
    252 #if defined(_M_X64)
    253 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
    254     switch (ORDER) { \
    255     case _Py_memory_order_acquire: \
    256       _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
    257       break; \
    258     case _Py_memory_order_release: \
    259       _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
    260       break; \
    261     default: \
    262       _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
    263       break; \
    264   }
    265 #else
    266 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
    267 #endif
    268 
    269 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
    270   switch (ORDER) { \
    271   case _Py_memory_order_acquire: \
    272     _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
    273     break; \
    274   case _Py_memory_order_release: \
    275     _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
    276     break; \
    277   default: \
    278     _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
    279     break; \
    280   }
    281 
    282 #if defined(_M_X64)
    283 /*  This has to be an intptr_t for now.
    284     gil_created() uses -1 as a sentinel value, if this returns
    285     a uintptr_t it will do an unsigned compare and crash
    286 */
    287 inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
    288     __int64 old;
    289     switch (order) {
    290     case _Py_memory_order_acquire:
    291     {
    292       do {
    293         old = *value;
    294       } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
    295       break;
    296     }
    297     case _Py_memory_order_release:
    298     {
    299       do {
    300         old = *value;
    301       } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
    302       break;
    303     }
    304     case _Py_memory_order_relaxed:
    305       old = *value;
    306       break;
    307     default:
    308     {
    309       do {
    310         old = *value;
    311       } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
    312       break;
    313     }
    314     }
    315     return old;
    316 }
    317 
    318 #else
    319 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
    320 #endif
    321 
    322 inline int _Py_atomic_load_32bit(volatile int* value, int order) {
    323     long old;
    324     switch (order) {
    325     case _Py_memory_order_acquire:
    326     {
    327       do {
    328         old = *value;
    329       } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
    330       break;
    331     }
    332     case _Py_memory_order_release:
    333     {
    334       do {
    335         old = *value;
    336       } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
    337       break;
    338     }
    339     case _Py_memory_order_relaxed:
    340       old = *value;
    341       break;
    342     default:
    343     {
    344       do {
    345         old = *value;
    346       } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
    347       break;
    348     }
    349     }
    350     return old;
    351 }
    352 
    353 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
    354   if (sizeof(*ATOMIC_VAL._value) == 8) { \
    355     _Py_atomic_store_64bit((volatile long long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
    356     _Py_atomic_store_32bit((volatile long*)ATOMIC_VAL._value, NEW_VAL, ORDER) }
    357 
    358 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
    359   ( \
    360     sizeof(*(ATOMIC_VAL._value)) == 8 ? \
    361     _Py_atomic_load_64bit((volatile long long*)ATOMIC_VAL._value, ORDER) : \
    362     _Py_atomic_load_32bit((volatile long*)ATOMIC_VAL._value, ORDER) \
    363   )
    364 #elif defined(_M_ARM) || defined(_M_ARM64)
    365 typedef enum _Py_memory_order {
    366     _Py_memory_order_relaxed,
    367     _Py_memory_order_acquire,
    368     _Py_memory_order_release,
    369     _Py_memory_order_acq_rel,
    370     _Py_memory_order_seq_cst
    371 } _Py_memory_order;
    372 
    373 typedef struct _Py_atomic_address {
    374     volatile uintptr_t _value;
    375 } _Py_atomic_address;
    376 
    377 typedef struct _Py_atomic_int {
    378     volatile int _value;
    379 } _Py_atomic_int;
    380 
    381 
    382 #if defined(_M_ARM64)
    383 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
    384     switch (ORDER) { \
    385     case _Py_memory_order_acquire: \
    386       _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
    387       break; \
    388     case _Py_memory_order_release: \
    389       _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
    390       break; \
    391     default: \
    392       _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
    393       break; \
    394   }
    395 #else
    396 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
    397 #endif
    398 
    399 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
    400   switch (ORDER) { \
    401   case _Py_memory_order_acquire: \
    402     _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
    403     break; \
    404   case _Py_memory_order_release: \
    405     _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
    406     break; \
    407   default: \
    408     _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
    409     break; \
    410   }
    411 
    412 #if defined(_M_ARM64)
    413 /*  This has to be an intptr_t for now.
    414     gil_created() uses -1 as a sentinel value, if this returns
    415     a uintptr_t it will do an unsigned compare and crash
    416 */
    417 inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
    418     uintptr_t old;
    419     switch (order) {
    420     case _Py_memory_order_acquire:
    421     {
    422       do {
    423         old = *value;
    424       } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
    425       break;
    426     }
    427     case _Py_memory_order_release:
    428     {
    429       do {
    430         old = *value;
    431       } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
    432       break;
    433     }
    434     case _Py_memory_order_relaxed:
    435       old = *value;
    436       break;
    437     default:
    438     {
    439       do {
    440         old = *value;
    441       } while(_InterlockedCompareExchange64(value, old, old) != old);
    442       break;
    443     }
    444     }
    445     return old;
    446 }
    447 
    448 #else
    449 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
    450 #endif
    451 
    452 inline int _Py_atomic_load_32bit(volatile int* value, int order) {
    453     int old;
    454     switch (order) {
    455     case _Py_memory_order_acquire:
    456     {
    457       do {
    458         old = *value;
    459       } while(_InterlockedCompareExchange_acq(value, old, old) != old);
    460       break;
    461     }
    462     case _Py_memory_order_release:
    463     {
    464       do {
    465         old = *value;
    466       } while(_InterlockedCompareExchange_rel(value, old, old) != old);
    467       break;
    468     }
    469     case _Py_memory_order_relaxed:
    470       old = *value;
    471       break;
    472     default:
    473     {
    474       do {
    475         old = *value;
    476       } while(_InterlockedCompareExchange(value, old, old) != old);
    477       break;
    478     }
    479     }
    480     return old;
    481 }
    482 
    483 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
    484   if (sizeof(*ATOMIC_VAL._value) == 8) { \
    485     _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
    486     _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
    487 
    488 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
    489   ( \
    490     sizeof(*(ATOMIC_VAL._value)) == 8 ? \
    491     _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
    492     _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
    493   )
    494 #endif
    495 #else  /* !gcc x86  !_msc_ver */
    496 typedef enum _Py_memory_order {
    497     _Py_memory_order_relaxed,
    498     _Py_memory_order_acquire,
    499     _Py_memory_order_release,
    500     _Py_memory_order_acq_rel,
    501     _Py_memory_order_seq_cst
    502 } _Py_memory_order;
    503 
    504 typedef struct _Py_atomic_address {
    505     uintptr_t _value;
    506 } _Py_atomic_address;
    507 
    508 typedef struct _Py_atomic_int {
    509     int _value;
    510 } _Py_atomic_int;
    511 /* Fall back to other compilers and processors by assuming that simple
    512    volatile accesses are atomic.  This is false, so people should port
    513    this. */
    514 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
    515 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
    516 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
    517     ((ATOMIC_VAL)->_value = NEW_VAL)
    518 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
    519     ((ATOMIC_VAL)->_value)
    520 #endif
    521 
    522 /* Standardized shortcuts. */
    523 #define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
    524     _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_seq_cst)
    525 #define _Py_atomic_load(ATOMIC_VAL) \
    526     _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_seq_cst)
    527 
    528 /* Python-local extensions */
    529 
    530 #define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
    531     _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed)
    532 #define _Py_atomic_load_relaxed(ATOMIC_VAL) \
    533     _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed)
    534 #endif  /* Py_BUILD_CORE */
    535 #endif  /* Py_ATOMIC_H */
    536