1 /* 2 * Copyright (c) 2016 Cyril Hrubis <chrubis (at) suse.cz> 3 * 4 * This program is free software: you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation, either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 /* The LTP library has some of its own atomic synchronisation primitives 18 * contained in this file. Generally speaking these should not be used 19 * directly in tests for synchronisation, instead use tst_checkpoint.h, 20 * tst_fuzzy_sync.h or the POSIX library. 21 * 22 * Notes on compile and runtime memory barriers and atomics. 23 * 24 * Within the LTP library we have three concerns when accessing variables 25 * shared by multiple threads or processes: 26 * 27 * (1) Removal or reordering of accesses by the compiler. 28 * (2) Atomicity of addition. 29 * (3) LOAD-STORE ordering between threads. 30 * 31 * The first (1) is the most likely to cause an error if not properly 32 * handled. We avoid it by using volatile variables and statements which will 33 * not be removed or reordered by the compiler during optimisation. This includes 34 * the __atomic and __sync intrinsics and volatile asm statements marked with 35 * "memory" as well as variables marked with volatile. 36 * 37 * On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a 38 * 32-bit integer will be atomic. However fetching and adding to a variable is 39 * quite likely not; so for (2) we need to ensure we use atomic addition. 40 * 41 * Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and 42 * STOREs of any shared variables (including non-atomics) that are made 43 * between calls to tst_fzsync_wait are completed (globally visible) before 44 * tst_fzsync_wait completes. For this, runtime memory and instruction 45 * barriers are required in addition to compile time. 46 * 47 * We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of 48 * simplicity. LTP tests tend to be syscall heavy so any performance gain from 49 * using a weaker memory model is unlikely to result in a relatively large 50 * performance improvement while at the same time being a potent source of 51 * confusion. 52 * 53 * Likewise, for the fallback ASM, the simplest "definitely will work, always" 54 * approach is preferred over anything more performant. 55 * 56 * Also see Documentation/memory-barriers.txt in the kernel tree and 57 * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html 58 * terminology may vary between sources. 59 */ 60 61 #ifndef TST_ATOMIC_H__ 62 #define TST_ATOMIC_H__ 63 64 #include "config.h" 65 66 #if HAVE_ATOMIC_MEMORY_MODEL == 1 67 static inline int tst_atomic_add_return(int i, int *v) 68 { 69 return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST); 70 } 71 72 static inline int tst_atomic_load(int *v) 73 { 74 return __atomic_load_n(v, __ATOMIC_SEQ_CST); 75 } 76 77 static inline void tst_atomic_store(int i, int *v) 78 { 79 __atomic_store_n(v, i, __ATOMIC_SEQ_CST); 80 } 81 82 #elif HAVE_SYNC_ADD_AND_FETCH == 1 83 static inline int tst_atomic_add_return(int i, int *v) 84 { 85 return __sync_add_and_fetch(v, i); 86 } 87 88 static inline int tst_atomic_load(int *v) 89 { 90 int ret; 91 92 __sync_synchronize(); 93 ret = *v; 94 __sync_synchronize(); 95 return ret; 96 } 97 98 static inline void tst_atomic_store(int i, int *v) 99 { 100 __sync_synchronize(); 101 *v = i; 102 __sync_synchronize(); 103 } 104 105 #elif defined(__i386__) || defined(__x86_64__) 106 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1 107 108 static inline int tst_atomic_add_return(int i, int *v) 109 { 110 int __ret = i; 111 112 /* 113 * taken from arch/x86/include/asm/cmpxchg.h 114 */ 115 asm volatile ("lock; xaddl %0, %1\n" 116 : "+r" (__ret), "+m" (*v) : : "memory", "cc"); 117 118 return i + __ret; 119 } 120 121 #elif defined(__powerpc__) || defined(__powerpc64__) 122 static inline int tst_atomic_add_return(int i, int *v) 123 { 124 int t; 125 126 /* taken from arch/powerpc/include/asm/atomic.h */ 127 asm volatile( 128 " sync\n" 129 "1: lwarx %0,0,%2 # atomic_add_return\n" 130 " add %0,%1,%0\n" 131 " stwcx. %0,0,%2 \n" 132 " bne- 1b\n" 133 " sync\n" 134 : "=&r" (t) 135 : "r" (i), "r" (v) 136 : "cc", "memory"); 137 138 return t; 139 } 140 141 static inline int tst_atomic_load(int *v) 142 { 143 int ret; 144 145 asm volatile("sync\n" : : : "memory"); 146 ret = *v; 147 asm volatile("sync\n" : : : "memory"); 148 149 return ret; 150 } 151 152 static inline void tst_atomic_store(int i, int *v) 153 { 154 asm volatile("sync\n" : : : "memory"); 155 *v = i; 156 asm volatile("sync\n" : : : "memory"); 157 } 158 159 #elif defined(__s390__) || defined(__s390x__) 160 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1 161 162 static inline int tst_atomic_add_return(int i, int *v) 163 { 164 int old_val, new_val; 165 166 /* taken from arch/s390/include/asm/atomic.h */ 167 asm volatile( 168 " l %0,%2\n" 169 "0: lr %1,%0\n" 170 " ar %1,%3\n" 171 " cs %0,%1,%2\n" 172 " jl 0b" 173 : "=&d" (old_val), "=&d" (new_val), "+Q" (*v) 174 : "d" (i) 175 : "cc", "memory"); 176 177 return old_val + i; 178 } 179 180 #elif defined(__arc__) 181 182 /*ARCv2 defines the smp barriers */ 183 #ifdef __ARC700__ 184 #define smp_mb() asm volatile("" : : : "memory") 185 #else 186 #define smp_mb() asm volatile("dmb 3\n" : : : "memory") 187 #endif 188 189 static inline int tst_atomic_add_return(int i, int *v) 190 { 191 unsigned int val; 192 193 smp_mb(); 194 195 asm volatile( 196 "1: llock %[val], [%[ctr]] \n" 197 " add %[val], %[val], %[i] \n" 198 " scond %[val], [%[ctr]] \n" 199 " bnz 1b \n" 200 : [val] "=&r" (val) 201 : [ctr] "r" (v), 202 [i] "ir" (i) 203 : "cc", "memory"); 204 205 smp_mb(); 206 207 return val; 208 } 209 210 static inline int tst_atomic_load(int *v) 211 { 212 int ret; 213 214 smp_mb(); 215 ret = *v; 216 smp_mb(); 217 218 return ret; 219 } 220 221 static inline void tst_atomic_store(int i, int *v) 222 { 223 smp_mb(); 224 *v = i; 225 smp_mb(); 226 } 227 228 #elif defined (__aarch64__) 229 static inline int tst_atomic_add_return(int i, int *v) 230 { 231 unsigned long tmp; 232 int result; 233 234 __asm__ __volatile__( 235 " prfm pstl1strm, %2 \n" 236 "1: ldaxr %w0, %2 \n" 237 " add %w0, %w0, %w3 \n" 238 " stlxr %w1, %w0, %2 \n" 239 " cbnz %w1, 1b \n" 240 " dmb ish \n" 241 : "=&r" (result), "=&r" (tmp), "+Q" (*v) 242 : "Ir" (i) 243 : "memory"); 244 245 return result; 246 } 247 248 /* We are using load and store exclusive (ldaxr & stlxr) instructions to try 249 * and help prevent the tst_atomic_load and, more likely, tst_atomic_store 250 * functions from interfering with tst_atomic_add_return which takes advantage 251 * of exclusivity. It is not clear if this is a good idea or not, but does 252 * mean that all three functions are very similar. 253 */ 254 static inline int tst_atomic_load(int *v) 255 { 256 int ret; 257 unsigned long tmp; 258 259 asm volatile("//atomic_load \n" 260 " prfm pstl1strm, %[v] \n" 261 "1: ldaxr %w[ret], %[v] \n" 262 " stlxr %w[tmp], %w[ret], %[v] \n" 263 " cbnz %w[tmp], 1b \n" 264 " dmb ish \n" 265 : [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v) 266 : : "memory"); 267 268 return ret; 269 } 270 271 static inline void tst_atomic_store(int i, int *v) 272 { 273 unsigned long tmp; 274 275 asm volatile("//atomic_store \n" 276 " prfm pstl1strm, %[v] \n" 277 "1: ldaxr %w[tmp], %[v] \n" 278 " stlxr %w[tmp], %w[i], %[v] \n" 279 " cbnz %w[tmp], 1b \n" 280 " dmb ish \n" 281 : [tmp] "=&r" (tmp), [v] "+Q" (*v) 282 : [i] "r" (i) 283 : "memory"); 284 } 285 286 #elif defined(__sparc__) && defined(__arch64__) 287 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1 288 static inline int tst_atomic_add_return(int i, int *v) 289 { 290 int ret, tmp; 291 292 /* Based on arch/sparc/lib/atomic_64.S with the exponential backoff 293 * function removed because we are unlikely to have a large (>= 16?) 294 * number of cores continuously trying to update one variable. 295 */ 296 asm volatile("/*atomic_add_return*/ \n" 297 "1: ldsw [%[v]], %[ret]; \n" 298 " add %[ret], %[i], %[tmp]; \n" 299 " cas [%[v]], %[ret], %[tmp]; \n" 300 " cmp %[ret], %[tmp]; \n" 301 " bne,pn %%icc, 1b; \n" 302 " nop; \n" 303 " add %[ret], %[i], %[ret]; \n" 304 : [ret] "=r&" (ret), [tmp] "=r&" (tmp) 305 : [i] "r" (i), [v] "r" (v) 306 : "memory", "cc"); 307 308 return ret; 309 } 310 311 #else /* HAVE_SYNC_ADD_AND_FETCH == 1 */ 312 # error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \ 313 and an LTP implementation is missing for your architecture. 314 #endif 315 316 #ifdef LTP_USE_GENERIC_LOAD_STORE_ASM 317 static inline int tst_atomic_load(int *v) 318 { 319 int ret; 320 321 asm volatile("" : : : "memory"); 322 ret = *v; 323 asm volatile("" : : : "memory"); 324 325 return ret; 326 } 327 328 static inline void tst_atomic_store(int i, int *v) 329 { 330 asm volatile("" : : : "memory"); 331 *v = i; 332 asm volatile("" : : : "memory"); 333 } 334 #endif 335 336 static inline int tst_atomic_inc(int *v) 337 { 338 return tst_atomic_add_return(1, v); 339 } 340 341 static inline int tst_atomic_dec(int *v) 342 { 343 return tst_atomic_add_return(-1, v); 344 } 345 346 #endif /* TST_ATOMIC_H__ */ 347