Home | History | Annotate | Download | only in include
      1 /*
      2  * Copyright (c) 2016 Cyril Hrubis <chrubis (at) suse.cz>
      3  *
      4  * This program is free software: you can redistribute it and/or modify
      5  * it under the terms of the GNU General Public License as published by
      6  * the Free Software Foundation, either version 2 of the License, or
      7  * (at your option) any later version.
      8  *
      9  * This program is distributed in the hope that it will be useful,
     10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12  * GNU General Public License for more details.
     13  *
     14  * You should have received a copy of the GNU General Public License
     15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
     16  */
     17 /* The LTP library has some of its own atomic synchronisation primitives
     18  * contained in this file. Generally speaking these should not be used
     19  * directly in tests for synchronisation, instead use tst_checkpoint.h,
     20  * tst_fuzzy_sync.h or the POSIX library.
     21  *
     22  * Notes on compile and runtime memory barriers and atomics.
     23  *
     24  * Within the LTP library we have three concerns when accessing variables
     25  * shared by multiple threads or processes:
     26  *
     27  * (1) Removal or reordering of accesses by the compiler.
     28  * (2) Atomicity of addition.
     29  * (3) LOAD-STORE ordering between threads.
     30  *
     31  * The first (1) is the most likely to cause an error if not properly
     32  * handled. We avoid it by using volatile variables and statements which will
     33  * not be removed or reordered by the compiler during optimisation. This includes
     34  * the __atomic and __sync intrinsics and volatile asm statements marked with
     35  * "memory" as well as variables marked with volatile.
     36  *
     37  * On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a
     38  * 32-bit integer will be atomic. However fetching and adding to a variable is
     39  * quite likely not; so for (2) we need to ensure we use atomic addition.
     40  *
     41  * Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and
     42  * STOREs of any shared variables (including non-atomics) that are made
     43  * between calls to tst_fzsync_wait are completed (globally visible) before
     44  * tst_fzsync_wait completes. For this, runtime memory and instruction
     45  * barriers are required in addition to compile time.
     46  *
     47  * We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of
     48  * simplicity. LTP tests tend to be syscall heavy so any performance gain from
     49  * using a weaker memory model is unlikely to result in a relatively large
     50  * performance improvement while at the same time being a potent source of
     51  * confusion.
     52  *
     53  * Likewise, for the fallback ASM, the simplest "definitely will work, always"
     54  * approach is preferred over anything more performant.
     55  *
     56  * Also see Documentation/memory-barriers.txt in the kernel tree and
     57  * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
     58  * terminology may vary between sources.
     59  */
     60 
     61 #ifndef TST_ATOMIC_H__
     62 #define TST_ATOMIC_H__
     63 
     64 #include "config.h"
     65 
     66 #if HAVE_ATOMIC_MEMORY_MODEL == 1
     67 static inline int tst_atomic_add_return(int i, int *v)
     68 {
     69 	return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST);
     70 }
     71 
     72 static inline int tst_atomic_load(int *v)
     73 {
     74 	return __atomic_load_n(v, __ATOMIC_SEQ_CST);
     75 }
     76 
     77 static inline void tst_atomic_store(int i, int *v)
     78 {
     79 	__atomic_store_n(v, i, __ATOMIC_SEQ_CST);
     80 }
     81 
     82 #elif HAVE_SYNC_ADD_AND_FETCH == 1
     83 static inline int tst_atomic_add_return(int i, int *v)
     84 {
     85 	return __sync_add_and_fetch(v, i);
     86 }
     87 
     88 static inline int tst_atomic_load(int *v)
     89 {
     90 	int ret;
     91 
     92 	__sync_synchronize();
     93 	ret = *v;
     94 	__sync_synchronize();
     95 	return ret;
     96 }
     97 
     98 static inline void tst_atomic_store(int i, int *v)
     99 {
    100 	__sync_synchronize();
    101 	*v = i;
    102 	__sync_synchronize();
    103 }
    104 
    105 #elif defined(__i386__) || defined(__x86_64__)
    106 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
    107 
    108 static inline int tst_atomic_add_return(int i, int *v)
    109 {
    110 	int __ret = i;
    111 
    112 	/*
    113 	 * taken from arch/x86/include/asm/cmpxchg.h
    114 	 */
    115 	asm volatile ("lock; xaddl %0, %1\n"
    116 		: "+r" (__ret), "+m" (*v) : : "memory", "cc");
    117 
    118 	return i + __ret;
    119 }
    120 
    121 #elif defined(__powerpc__) || defined(__powerpc64__)
    122 static inline int tst_atomic_add_return(int i, int *v)
    123 {
    124 	int t;
    125 
    126 	/* taken from arch/powerpc/include/asm/atomic.h */
    127 	asm volatile(
    128 		"	sync\n"
    129 		"1:	lwarx	%0,0,%2		# atomic_add_return\n"
    130 		"	add %0,%1,%0\n"
    131 		"	stwcx.	%0,0,%2 \n"
    132 		"	bne-	1b\n"
    133 		"	sync\n"
    134 		: "=&r" (t)
    135 		: "r" (i), "r" (v)
    136 		: "cc", "memory");
    137 
    138 	return t;
    139 }
    140 
    141 static inline int tst_atomic_load(int *v)
    142 {
    143 	int ret;
    144 
    145 	asm volatile("sync\n" : : : "memory");
    146 	ret = *v;
    147 	asm volatile("sync\n" : : : "memory");
    148 
    149 	return ret;
    150 }
    151 
    152 static inline void tst_atomic_store(int i, int *v)
    153 {
    154 	asm volatile("sync\n" : : : "memory");
    155 	*v = i;
    156 	asm volatile("sync\n" : : : "memory");
    157 }
    158 
    159 #elif defined(__s390__) || defined(__s390x__)
    160 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
    161 
    162 static inline int tst_atomic_add_return(int i, int *v)
    163 {
    164 	int old_val, new_val;
    165 
    166 	/* taken from arch/s390/include/asm/atomic.h */
    167 	asm volatile(
    168 		"	l	%0,%2\n"
    169 		"0:	lr	%1,%0\n"
    170 		"	ar	%1,%3\n"
    171 		"	cs	%0,%1,%2\n"
    172 		"	jl	0b"
    173 		: "=&d" (old_val), "=&d" (new_val), "+Q" (*v)
    174 		: "d" (i)
    175 		: "cc", "memory");
    176 
    177 	return old_val + i;
    178 }
    179 
    180 #elif defined(__arc__)
    181 
    182 /*ARCv2 defines the smp barriers */
    183 #ifdef __ARC700__
    184 #define smp_mb()	asm volatile("" : : : "memory")
    185 #else
    186 #define smp_mb()	asm volatile("dmb 3\n" : : : "memory")
    187 #endif
    188 
    189 static inline int tst_atomic_add_return(int i, int *v)
    190 {
    191 	unsigned int val;
    192 
    193 	smp_mb();
    194 
    195 	asm volatile(
    196 		"1:	llock   %[val], [%[ctr]]	\n"
    197 		"	add     %[val], %[val], %[i]	\n"
    198 		"	scond   %[val], [%[ctr]]	\n"
    199 		"	bnz     1b			\n"
    200 		: [val]	"=&r"	(val)
    201 		: [ctr]	"r"	(v),
    202 		  [i]	"ir"	(i)
    203 		: "cc", "memory");
    204 
    205 	smp_mb();
    206 
    207 	return val;
    208 }
    209 
    210 static inline int tst_atomic_load(int *v)
    211 {
    212 	int ret;
    213 
    214 	smp_mb();
    215 	ret = *v;
    216 	smp_mb();
    217 
    218 	return ret;
    219 }
    220 
    221 static inline void tst_atomic_store(int i, int *v)
    222 {
    223 	smp_mb();
    224 	*v = i;
    225 	smp_mb();
    226 }
    227 
    228 #elif defined (__aarch64__)
    229 static inline int tst_atomic_add_return(int i, int *v)
    230 {
    231 	unsigned long tmp;
    232 	int result;
    233 
    234 	__asm__ __volatile__(
    235 "       prfm    pstl1strm, %2	\n"
    236 "1:     ldaxr	%w0, %2		\n"
    237 "       add	%w0, %w0, %w3	\n"
    238 "       stlxr	%w1, %w0, %2	\n"
    239 "       cbnz	%w1, 1b		\n"
    240 "       dmb ish			\n"
    241 	: "=&r" (result), "=&r" (tmp), "+Q" (*v)
    242 	: "Ir" (i)
    243 	: "memory");
    244 
    245 	return result;
    246 }
    247 
    248 /* We are using load and store exclusive (ldaxr & stlxr) instructions to try
    249  * and help prevent the tst_atomic_load and, more likely, tst_atomic_store
    250  * functions from interfering with tst_atomic_add_return which takes advantage
    251  * of exclusivity. It is not clear if this is a good idea or not, but does
    252  * mean that all three functions are very similar.
    253  */
    254 static inline int tst_atomic_load(int *v)
    255 {
    256 	int ret;
    257 	unsigned long tmp;
    258 
    259 	asm volatile("//atomic_load			\n"
    260 		"	prfm	pstl1strm,  %[v]	\n"
    261 		"1:	ldaxr	%w[ret], %[v]		\n"
    262 		"	stlxr   %w[tmp], %w[ret], %[v]  \n"
    263 		"	cbnz    %w[tmp], 1b		\n"
    264 		"	dmb ish				\n"
    265 		: [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v)
    266 		: : "memory");
    267 
    268 	return ret;
    269 }
    270 
    271 static inline void tst_atomic_store(int i, int *v)
    272 {
    273 	unsigned long tmp;
    274 
    275 	asm volatile("//atomic_store			\n"
    276 		"	prfm	pstl1strm, %[v]		\n"
    277 		"1:	ldaxr	%w[tmp], %[v]		\n"
    278 		"	stlxr   %w[tmp], %w[i], %[v]	\n"
    279 		"	cbnz    %w[tmp], 1b		\n"
    280 		"	dmb ish				\n"
    281 		: [tmp] "=&r" (tmp), [v] "+Q" (*v)
    282 		: [i] "r" (i)
    283 		: "memory");
    284 }
    285 
    286 #elif defined(__sparc__) && defined(__arch64__)
    287 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
    288 static inline int tst_atomic_add_return(int i, int *v)
    289 {
    290 	int ret, tmp;
    291 
    292 	/* Based on arch/sparc/lib/atomic_64.S with the exponential backoff
    293 	 * function removed because we are unlikely to have a large (>= 16?)
    294 	 * number of cores continuously trying to update one variable.
    295 	 */
    296 	asm volatile("/*atomic_add_return*/		\n"
    297 		"1:	ldsw	[%[v]], %[ret];		\n"
    298 		"	add	%[ret], %[i], %[tmp];	\n"
    299 		"	cas	[%[v]], %[ret], %[tmp];	\n"
    300 		"	cmp	%[ret], %[tmp];		\n"
    301 		"	bne,pn	%%icc, 1b;		\n"
    302 		"	nop;				\n"
    303 		"	add	%[ret], %[i], %[ret];	\n"
    304 		: [ret] "=r&" (ret), [tmp] "=r&" (tmp)
    305 		: [i] "r" (i), [v] "r" (v)
    306 		: "memory", "cc");
    307 
    308 	return ret;
    309 }
    310 
    311 #else /* HAVE_SYNC_ADD_AND_FETCH == 1 */
    312 # error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \
    313         and an LTP implementation is missing for your architecture.
    314 #endif
    315 
    316 #ifdef LTP_USE_GENERIC_LOAD_STORE_ASM
    317 static inline int tst_atomic_load(int *v)
    318 {
    319 	int ret;
    320 
    321 	asm volatile("" : : : "memory");
    322 	ret = *v;
    323 	asm volatile("" : : : "memory");
    324 
    325 	return ret;
    326 }
    327 
    328 static inline void tst_atomic_store(int i, int *v)
    329 {
    330 	asm volatile("" : : : "memory");
    331 	*v = i;
    332 	asm volatile("" : : : "memory");
    333 }
    334 #endif
    335 
    336 static inline int tst_atomic_inc(int *v)
    337 {
    338 	return tst_atomic_add_return(1, v);
    339 }
    340 
    341 static inline int tst_atomic_dec(int *v)
    342 {
    343 	return tst_atomic_add_return(-1, v);
    344 }
    345 
    346 #endif	/* TST_ATOMIC_H__ */
    347