1 2 /* This is an example of a program which does atomic memory operations 3 between two processes which share a page. Valgrind 3.4.1 and 4 earlier produce incorrect answers because it does not preserve 5 atomicity of the relevant instructions in the generated code; but 6 the post-DCAS-merge versions of Valgrind do behave correctly. */ 7 8 #include <stdlib.h> 9 #include <stdio.h> 10 #include <string.h> 11 #include <assert.h> 12 #include <unistd.h> 13 #include <sys/wait.h> 14 #include "tests/sys_mman.h" 15 16 #define NNN 3456987 17 18 #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7)) 19 20 21 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n ) 22 { 23 #if defined(VGA_x86) 24 unsigned long block[2]; 25 block[0] = (unsigned long)p; 26 block[1] = n; 27 __asm__ __volatile__( 28 "movl 0(%%esi),%%eax" "\n\t" 29 "movl 4(%%esi),%%ebx" "\n\t" 30 "lock; addb %%bl,(%%eax)" "\n" 31 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" 32 ); 33 #elif defined(VGA_amd64) 34 unsigned long block[2]; 35 block[0] = (unsigned long)p; 36 block[1] = n; 37 __asm__ __volatile__( 38 "movq 0(%%rsi),%%rax" "\n\t" 39 "movq 8(%%rsi),%%rbx" "\n\t" 40 "lock; addb %%bl,(%%rax)" "\n" 41 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 42 ); 43 #elif defined(VGA_ppc32) 44 /* Nasty hack. Does correctly atomically do *p += n, but only if p 45 is 4-aligned -- guaranteed by caller. */ 46 unsigned long success; 47 do { 48 __asm__ __volatile__( 49 "lwarx 15,0,%1" "\n\t" 50 "add 15,15,%2" "\n\t" 51 "stwcx. 15,0,%1" "\n\t" 52 "mfcr %0" "\n\t" 53 "srwi %0,%0,29" "\n\t" 54 "andi. %0,%0,1" "\n" 55 : /*out*/"=b"(success) 56 : /*in*/ "b"(p), "b"(((unsigned long)n) << 24) 57 : /*trash*/ "memory", "cc", "r15" 58 ); 59 } while (success != 1); 60 #elif defined(VGA_ppc64) 61 /* Nasty hack. Does correctly atomically do *p += n, but only if p 62 is 8-aligned -- guaranteed by caller. */ 63 unsigned long success; 64 do { 65 __asm__ __volatile__( 66 "ldarx 15,0,%1" "\n\t" 67 "add 15,15,%2" "\n\t" 68 "stdcx. 15,0,%1" "\n\t" 69 "mfcr %0" "\n\t" 70 "srwi %0,%0,29" "\n\t" 71 "andi. %0,%0,1" "\n" 72 : /*out*/"=b"(success) 73 : /*in*/ "b"(p), "b"(((unsigned long)n) << 56) 74 : /*trash*/ "memory", "cc", "r15" 75 ); 76 } while (success != 1); 77 #elif defined(VGA_arm) 78 *p += n; 79 #else 80 # error "Unsupported arch" 81 #endif 82 } 83 84 85 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n ) 86 { 87 #if defined(VGA_x86) 88 unsigned long block[2]; 89 block[0] = (unsigned long)p; 90 block[1] = n; 91 __asm__ __volatile__( 92 "movl 0(%%esi),%%eax" "\n\t" 93 "movl 4(%%esi),%%ebx" "\n\t" 94 "lock; addw %%bx,(%%eax)" "\n" 95 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" 96 ); 97 #elif defined(VGA_amd64) 98 unsigned long block[2]; 99 block[0] = (unsigned long)p; 100 block[1] = n; 101 __asm__ __volatile__( 102 "movq 0(%%rsi),%%rax" "\n\t" 103 "movq 8(%%rsi),%%rbx" "\n\t" 104 "lock; addw %%bx,(%%rax)" "\n" 105 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 106 ); 107 #elif defined(VGA_ppc32) 108 /* Nasty hack. Does correctly atomically do *p += n, but only if p 109 is 8-aligned -- guaranteed by caller. */ 110 unsigned long success; 111 do { 112 __asm__ __volatile__( 113 "lwarx 15,0,%1" "\n\t" 114 "add 15,15,%2" "\n\t" 115 "stwcx. 15,0,%1" "\n\t" 116 "mfcr %0" "\n\t" 117 "srwi %0,%0,29" "\n\t" 118 "andi. %0,%0,1" "\n" 119 : /*out*/"=b"(success) 120 : /*in*/ "b"(p), "b"(((unsigned long)n) << 16) 121 : /*trash*/ "memory", "cc", "r15" 122 ); 123 } while (success != 1); 124 #elif defined(VGA_ppc64) 125 /* Nasty hack. Does correctly atomically do *p += n, but only if p 126 is 8-aligned -- guaranteed by caller. */ 127 unsigned long success; 128 do { 129 __asm__ __volatile__( 130 "ldarx 15,0,%1" "\n\t" 131 "add 15,15,%2" "\n\t" 132 "stdcx. 15,0,%1" "\n\t" 133 "mfcr %0" "\n\t" 134 "srwi %0,%0,29" "\n\t" 135 "andi. %0,%0,1" "\n" 136 : /*out*/"=b"(success) 137 : /*in*/ "b"(p), "b"(((unsigned long)n) << 48) 138 : /*trash*/ "memory", "cc", "r15" 139 ); 140 } while (success != 1); 141 #elif defined(VGA_arm) 142 *p += n; 143 #else 144 # error "Unsupported arch" 145 #endif 146 } 147 148 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n ) 149 { 150 #if defined(VGA_x86) 151 unsigned long block[2]; 152 block[0] = (unsigned long)p; 153 block[1] = n; 154 __asm__ __volatile__( 155 "movl 0(%%esi),%%eax" "\n\t" 156 "movl 4(%%esi),%%ebx" "\n\t" 157 "lock; addl %%ebx,(%%eax)" "\n" 158 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" 159 ); 160 #elif defined(VGA_amd64) 161 unsigned long block[2]; 162 block[0] = (unsigned long)p; 163 block[1] = n; 164 __asm__ __volatile__( 165 "movq 0(%%rsi),%%rax" "\n\t" 166 "movq 8(%%rsi),%%rbx" "\n\t" 167 "lock; addl %%ebx,(%%rax)" "\n" 168 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 169 ); 170 #elif defined(VGA_ppc32) 171 unsigned long success; 172 do { 173 __asm__ __volatile__( 174 "lwarx 15,0,%1" "\n\t" 175 "add 15,15,%2" "\n\t" 176 "stwcx. 15,0,%1" "\n\t" 177 "mfcr %0" "\n\t" 178 "srwi %0,%0,29" "\n\t" 179 "andi. %0,%0,1" "\n" 180 : /*out*/"=b"(success) 181 : /*in*/ "b"(p), "b"(n) 182 : /*trash*/ "memory", "cc", "r15" 183 ); 184 } while (success != 1); 185 #elif defined(VGA_ppc64) 186 /* Nasty hack. Does correctly atomically do *p += n, but only if p 187 is 8-aligned -- guaranteed by caller. */ 188 unsigned long success; 189 do { 190 __asm__ __volatile__( 191 "ldarx 15,0,%1" "\n\t" 192 "add 15,15,%2" "\n\t" 193 "stdcx. 15,0,%1" "\n\t" 194 "mfcr %0" "\n\t" 195 "srwi %0,%0,29" "\n\t" 196 "andi. %0,%0,1" "\n" 197 : /*out*/"=b"(success) 198 : /*in*/ "b"(p), "b"(((unsigned long)n) << 32) 199 : /*trash*/ "memory", "cc", "r15" 200 ); 201 } while (success != 1); 202 #elif defined(VGA_arm) 203 unsigned int block[3] 204 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; 205 do { 206 __asm__ __volatile__( 207 "mov r5, %0" "\n\t" 208 "ldr r9, [r5, #0]" "\n\t" // p 209 "ldr r10, [r5, #4]" "\n\t" // n 210 "ldrex r8, [r9]" "\n\t" 211 "add r8, r8, r10" "\n\t" 212 "strex r11, r8, [r9]" "\n\t" 213 "str r11, [r5, #8]" "\n\t" 214 : /*out*/ 215 : /*in*/ "r"(&block[0]) 216 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10" 217 ); 218 } while (block[2] != 0); 219 #else 220 # error "Unsupported arch" 221 #endif 222 } 223 224 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n ) 225 { 226 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_arm) 227 /* do nothing; is not supported */ 228 #elif defined(VGA_amd64) 229 // this is a bit subtle. It relies on the fact that, on a 64-bit platform, 230 // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*) 231 unsigned long long int block[2]; 232 block[0] = (unsigned long long int)(unsigned long)p; 233 block[1] = n; 234 __asm__ __volatile__( 235 "movq 0(%%rsi),%%rax" "\n\t" 236 "movq 8(%%rsi),%%rbx" "\n\t" 237 "lock; addq %%rbx,(%%rax)" "\n" 238 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 239 ); 240 #elif defined(VGA_ppc64) 241 unsigned long success; 242 do { 243 __asm__ __volatile__( 244 "ldarx 15,0,%1" "\n\t" 245 "add 15,15,%2" "\n\t" 246 "stdcx. 15,0,%1" "\n\t" 247 "mfcr %0" "\n\t" 248 "srwi %0,%0,29" "\n\t" 249 "andi. %0,%0,1" "\n" 250 : /*out*/"=b"(success) 251 : /*in*/ "b"(p), "b"(n) 252 : /*trash*/ "memory", "cc", "r15" 253 ); 254 } while (success != 1); 255 #else 256 # error "Unsupported arch" 257 #endif 258 } 259 260 int main ( int argc, char** argv ) 261 { 262 int i, status; 263 char* page; 264 char* p8; 265 short* p16; 266 int* p32; 267 long long int* p64; 268 pid_t child, p2; 269 270 printf("parent, pre-fork\n"); 271 272 page = mmap( 0, sysconf(_SC_PAGESIZE), 273 PROT_READ|PROT_WRITE, 274 MAP_ANONYMOUS|MAP_SHARED, -1, 0 ); 275 if (page == MAP_FAILED) { 276 perror("mmap failed"); 277 exit(1); 278 } 279 280 p8 = (char*)(page+0); 281 p16 = (short*)(page+256); 282 p32 = (int*)(page+512); 283 p64 = (long long int*)(page+768); 284 285 assert( IS_8_ALIGNED(p8) ); 286 assert( IS_8_ALIGNED(p16) ); 287 assert( IS_8_ALIGNED(p32) ); 288 assert( IS_8_ALIGNED(p64) ); 289 290 memset(page, 0, 1024); 291 292 *p8 = 0; 293 *p16 = 0; 294 *p32 = 0; 295 *p64 = 0; 296 297 child = fork(); 298 if (child == -1) { 299 perror("fork() failed\n"); 300 return 1; 301 } 302 303 if (child == 0) { 304 /* --- CHILD --- */ 305 printf("child\n"); 306 for (i = 0; i < NNN; i++) { 307 atomic_add_8bit(p8, 1); 308 atomic_add_16bit(p16, 1); 309 atomic_add_32bit(p32, 1); 310 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ 311 } 312 return 1; 313 /* NOTREACHED */ 314 315 } 316 317 /* --- PARENT --- */ 318 319 printf("parent\n"); 320 321 for (i = 0; i < NNN; i++) { 322 atomic_add_8bit(p8, 1); 323 atomic_add_16bit(p16, 1); 324 atomic_add_32bit(p32, 1); 325 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ 326 } 327 328 p2 = waitpid(child, &status, 0); 329 assert(p2 == child); 330 331 /* assert that child finished normally */ 332 assert(WIFEXITED(status)); 333 334 printf("FINAL VALUES: 8 bit %d, 16 bit %d, 32 bit %d, 64 bit %lld\n", 335 (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 ); 336 337 if (-74 == (int)(*(signed char*)p8) 338 && 32694 == (int)(*p16) 339 && 6913974 == *p32 340 && (0LL == *p64 || 682858642110LL == *p64)) { 341 printf("PASS\n"); 342 } else { 343 printf("FAIL -- see source code for expected values\n"); 344 } 345 346 printf("parent exits\n"); 347 348 return 0; 349 } 350