Home | History | Annotate | Download | only in internal
      1 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
      2 #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
      3 
      4 #define ATOMIC_INIT(...) {__VA_ARGS__}
      5 
      6 typedef enum {
      7 	atomic_memory_order_relaxed,
      8 	atomic_memory_order_acquire,
      9 	atomic_memory_order_release,
     10 	atomic_memory_order_acq_rel,
     11 	atomic_memory_order_seq_cst
     12 } atomic_memory_order_t;
     13 
     14 ATOMIC_INLINE void
     15 atomic_fence(atomic_memory_order_t mo) {
     16 	/* Easy cases first: no barrier, and full barrier. */
     17 	if (mo == atomic_memory_order_relaxed) {
     18 		asm volatile("" ::: "memory");
     19 		return;
     20 	}
     21 	if (mo == atomic_memory_order_seq_cst) {
     22 		asm volatile("" ::: "memory");
     23 		__sync_synchronize();
     24 		asm volatile("" ::: "memory");
     25 		return;
     26 	}
     27 	asm volatile("" ::: "memory");
     28 #  if defined(__i386__) || defined(__x86_64__)
     29 	/* This is implicit on x86. */
     30 #  elif defined(__ppc__)
     31 	asm volatile("lwsync");
     32 #  elif defined(__sparc__) && defined(__arch64__)
     33 	if (mo == atomic_memory_order_acquire) {
     34 		asm volatile("membar #LoadLoad | #LoadStore");
     35 	} else if (mo == atomic_memory_order_release) {
     36 		asm volatile("membar #LoadStore | #StoreStore");
     37 	} else {
     38 		asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
     39 	}
     40 #  else
     41 	__sync_synchronize();
     42 #  endif
     43 	asm volatile("" ::: "memory");
     44 }
     45 
     46 /*
     47  * A correct implementation of seq_cst loads and stores on weakly ordered
     48  * architectures could do either of the following:
     49  *   1. store() is weak-fence -> store -> strong fence, load() is load ->
     50  *      strong-fence.
     51  *   2. store() is strong-fence -> store, load() is strong-fence -> load ->
     52  *      weak-fence.
     53  * The tricky thing is, load() and store() above can be the load or store
     54  * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
     55  * means going with strategy 2.
     56  * On strongly ordered architectures, the natural strategy is to stick a strong
     57  * fence after seq_cst stores, and have naked loads.  So we want the strong
     58  * fences in different places on different architectures.
     59  * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
     60  * accomplish this.
     61  */
     62 
     63 ATOMIC_INLINE void
     64 atomic_pre_sc_load_fence() {
     65 #  if defined(__i386__) || defined(__x86_64__) ||			\
     66     (defined(__sparc__) && defined(__arch64__))
     67 	atomic_fence(atomic_memory_order_relaxed);
     68 #  else
     69 	atomic_fence(atomic_memory_order_seq_cst);
     70 #  endif
     71 }
     72 
     73 ATOMIC_INLINE void
     74 atomic_post_sc_store_fence() {
     75 #  if defined(__i386__) || defined(__x86_64__) ||			\
     76     (defined(__sparc__) && defined(__arch64__))
     77 	atomic_fence(atomic_memory_order_seq_cst);
     78 #  else
     79 	atomic_fence(atomic_memory_order_relaxed);
     80 #  endif
     81 
     82 }
     83 
     84 #define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
     85     /* unused */ lg_size)						\
     86 typedef struct {							\
     87 	type volatile repr;						\
     88 } atomic_##short_type##_t;						\
     89 									\
     90 ATOMIC_INLINE type							\
     91 atomic_load_##short_type(const atomic_##short_type##_t *a,		\
     92     atomic_memory_order_t mo) {						\
     93 	if (mo == atomic_memory_order_seq_cst) {			\
     94 		atomic_pre_sc_load_fence();				\
     95 	}								\
     96 	type result = a->repr;						\
     97 	if (mo != atomic_memory_order_relaxed) {			\
     98 		atomic_fence(atomic_memory_order_acquire);		\
     99 	}								\
    100 	return result;							\
    101 }									\
    102 									\
    103 ATOMIC_INLINE void							\
    104 atomic_store_##short_type(atomic_##short_type##_t *a,			\
    105     type val, atomic_memory_order_t mo) {				\
    106 	if (mo != atomic_memory_order_relaxed) {			\
    107 		atomic_fence(atomic_memory_order_release);		\
    108 	}								\
    109 	a->repr = val;							\
    110 	if (mo == atomic_memory_order_seq_cst) {			\
    111 		atomic_post_sc_store_fence();				\
    112 	}								\
    113 }									\
    114 									\
    115 ATOMIC_INLINE type							\
    116 atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
    117     atomic_memory_order_t mo) {						\
    118 	/*								\
    119 	 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
    120 	 * an atomic exchange builtin.  We fake it with a CAS loop.	\
    121 	 */								\
    122 	while (true) {							\
    123 		type old = a->repr;					\
    124 		if (__sync_bool_compare_and_swap(&a->repr, old, val)) {	\
    125 			return old;					\
    126 		}							\
    127 	}								\
    128 }									\
    129 									\
    130 ATOMIC_INLINE bool							\
    131 atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
    132     type *expected, type desired, atomic_memory_order_t success_mo,	\
    133     atomic_memory_order_t failure_mo) {					\
    134 	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
    135 	    desired);							\
    136 	if (prev == *expected) {					\
    137 		return true;						\
    138 	} else {							\
    139 		*expected = prev;					\
    140 		return false;						\
    141 	}								\
    142 }									\
    143 ATOMIC_INLINE bool							\
    144 atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
    145     type *expected, type desired, atomic_memory_order_t success_mo,	\
    146     atomic_memory_order_t failure_mo) {					\
    147 	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
    148 	    desired);							\
    149 	if (prev == *expected) {					\
    150 		return true;						\
    151 	} else {							\
    152 		*expected = prev;					\
    153 		return false;						\
    154 	}								\
    155 }
    156 
    157 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
    158     /* unused */ lg_size)						\
    159 JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
    160 									\
    161 ATOMIC_INLINE type							\
    162 atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
    163     atomic_memory_order_t mo) {						\
    164 	return __sync_fetch_and_add(&a->repr, val);			\
    165 }									\
    166 									\
    167 ATOMIC_INLINE type							\
    168 atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
    169     atomic_memory_order_t mo) {						\
    170 	return __sync_fetch_and_sub(&a->repr, val);			\
    171 }									\
    172 									\
    173 ATOMIC_INLINE type							\
    174 atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
    175     atomic_memory_order_t mo) {						\
    176 	return __sync_fetch_and_and(&a->repr, val);			\
    177 }									\
    178 									\
    179 ATOMIC_INLINE type							\
    180 atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
    181     atomic_memory_order_t mo) {						\
    182 	return __sync_fetch_and_or(&a->repr, val);			\
    183 }									\
    184 									\
    185 ATOMIC_INLINE type							\
    186 atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
    187     atomic_memory_order_t mo) {						\
    188 	return __sync_fetch_and_xor(&a->repr, val);			\
    189 }
    190 
    191 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
    192