1 2 /* This is an example of a program which does atomic memory operations 3 between two processes which share a page. Valgrind 3.4.1 and 4 earlier produce incorrect answers because it does not preserve 5 atomicity of the relevant instructions in the generated code; but 6 the post-DCAS-merge versions of Valgrind do behave correctly. */ 7 8 /* On ARM, this can be compiled into either ARM or Thumb code, so as 9 to test both A and T encodings of LDREX/STREX et al. Also on ARM, 10 it tests doubleword atomics (LDREXD, STREXD) which I don't think it 11 does on any other platform. */ 12 13 #include <stdlib.h> 14 #include <stdio.h> 15 #include <string.h> 16 #include <assert.h> 17 #include <unistd.h> 18 #include <sys/wait.h> 19 #include "tests/sys_mman.h" 20 21 #define NNN 3456987 22 23 #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7)) 24 25 26 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n ) 27 { 28 #if defined(VGA_x86) 29 unsigned long block[2]; 30 block[0] = (unsigned long)p; 31 block[1] = n; 32 __asm__ __volatile__( 33 "movl 0(%%esi),%%eax" "\n\t" 34 "movl 4(%%esi),%%ebx" "\n\t" 35 "lock; addb %%bl,(%%eax)" "\n" 36 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" 37 ); 38 #elif defined(VGA_amd64) 39 unsigned long block[2]; 40 block[0] = (unsigned long)p; 41 block[1] = n; 42 __asm__ __volatile__( 43 "movq 0(%%rsi),%%rax" "\n\t" 44 "movq 8(%%rsi),%%rbx" "\n\t" 45 "lock; addb %%bl,(%%rax)" "\n" 46 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 47 ); 48 #elif defined(VGA_ppc32) 49 /* Nasty hack. Does correctly atomically do *p += n, but only if p 50 is 4-aligned -- guaranteed by caller. */ 51 unsigned long success; 52 do { 53 __asm__ __volatile__( 54 "lwarx 15,0,%1" "\n\t" 55 "add 15,15,%2" "\n\t" 56 "stwcx. 15,0,%1" "\n\t" 57 "mfcr %0" "\n\t" 58 "srwi %0,%0,29" "\n\t" 59 "andi. %0,%0,1" "\n" 60 : /*out*/"=b"(success) 61 : /*in*/ "b"(p), "b"(((unsigned long)n) << 24) 62 : /*trash*/ "memory", "cc", "r15" 63 ); 64 } while (success != 1); 65 #elif defined(VGA_ppc64be) 66 /* Nasty hack. Does correctly atomically do *p += n, but only if p 67 is 8-aligned -- guaranteed by caller. */ 68 unsigned long success; 69 do { 70 __asm__ __volatile__( 71 "ldarx 15,0,%1" "\n\t" 72 "add 15,15,%2" "\n\t" 73 "stdcx. 15,0,%1" "\n\t" 74 "mfcr %0" "\n\t" 75 "srwi %0,%0,29" "\n\t" 76 "andi. %0,%0,1" "\n" 77 : /*out*/"=b"(success) 78 : /*in*/ "b"(p), "b"(((unsigned long)n) << 56) 79 : /*trash*/ "memory", "cc", "r15" 80 ); 81 } while (success != 1); 82 #elif defined(VGA_ppc64le) 83 /* Nasty hack. Does correctly atomically do *p += n, but only if p 84 is 8-aligned -- guaranteed by caller. */ 85 unsigned long success; 86 do { 87 __asm__ __volatile__( 88 "ldarx 15,0,%1" "\n\t" 89 "add 15,15,%2" "\n\t" 90 "stdcx. 15,0,%1" "\n\t" 91 "mfcr %0" "\n\t" 92 "srwi %0,%0,29" "\n\t" 93 "andi. %0,%0,1" "\n" 94 : /*out*/"=b"(success) 95 : /*in*/ "b"(p), "b"(((unsigned long)n)) 96 : /*trash*/ "memory", "cc", "r15" 97 ); 98 } while (success != 1); 99 #elif defined(VGA_arm) 100 unsigned int block[3] 101 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; 102 do { 103 __asm__ __volatile__( 104 "mov r5, %0" "\n\t" 105 "ldr r9, [r5, #0]" "\n\t" // p 106 "ldr r10, [r5, #4]" "\n\t" // n 107 "ldrexb r8, [r9]" "\n\t" 108 "add r8, r8, r10" "\n\t" 109 "strexb r4, r8, [r9]" "\n\t" 110 "str r4, [r5, #8]" "\n\t" 111 : /*out*/ 112 : /*in*/ "r"(&block[0]) 113 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" 114 ); 115 } while (block[2] != 0); 116 #elif defined(VGA_arm64) 117 unsigned long long int block[3] 118 = { (unsigned long long int)p, (unsigned long long int)n, 119 0xFFFFFFFFFFFFFFFFULL}; 120 do { 121 __asm__ __volatile__( 122 "mov x5, %0" "\n\t" 123 "ldr x9, [x5, #0]" "\n\t" // p 124 "ldr x10, [x5, #8]" "\n\t" // n 125 "ldxrb w8, [x9]" "\n\t" 126 "add x8, x8, x10" "\n\t" 127 "stxrb w4, w8, [x9]" "\n\t" 128 "str x4, [x5, #16]" "\n\t" 129 : /*out*/ 130 : /*in*/ "r"(&block[0]) 131 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" 132 ); 133 } while (block[2] != 0); 134 #elif defined(VGA_s390x) 135 int dummy; 136 __asm__ __volatile__( 137 " l 0,%0\n\t" 138 "0: st 0,%1\n\t" 139 " icm 1,1,%1\n\t" 140 " ar 1,%2\n\t" 141 " stcm 1,1,%1\n\t" 142 " l 1,%1\n\t" 143 " cs 0,1,%0\n\t" 144 " jl 0b\n\t" 145 : "+m" (*p), "+m" (dummy) 146 : "d" (n) 147 : "cc", "memory", "0", "1"); 148 #elif defined(VGA_mips32) 149 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an 150 exception that can cause this function to fail. */ 151 #if defined (_MIPSEL) 152 unsigned int block[3] 153 = { (unsigned int)p, (unsigned int)n, 0x0 }; 154 do { 155 __asm__ __volatile__( 156 "move $t0, %0" "\n\t" 157 "lw $t1, 0($t0)" "\n\t" // p 158 "lw $t2, 4($t0)" "\n\t" // n 159 "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF 160 "li $t4, 0xFF" "\n\t" 161 "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFFFF00 162 "ll $t3, 0($t1)" "\n\t" // $t3 = old value 163 "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFFFF00 164 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n 165 "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF 166 "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4 167 "sc $t3, 0($t1)" "\n\t" 168 "sw $t3, 8($t0)" "\n\t" // save result 169 : /*out*/ 170 : /*in*/ "r"(&block[0]) 171 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4" 172 ); 173 } while (block[2] != 1); 174 #elif defined (_MIPSEB) 175 unsigned int block[3] 176 = { (unsigned int)p, (unsigned int)n << 24, 0x0 }; 177 do { 178 __asm__ __volatile__( 179 "move $t0, %0" "\n\t" 180 "lw $t1, 0($t0)" "\n\t" // p 181 "lw $t2, 4($t0)" "\n\t" // n 182 "ll $t3, 0($t1)" "\n\t" 183 "addu $t3, $t3, $t2" "\n\t" 184 "sc $t3, 0($t1)" "\n\t" 185 "sw $t3, 8($t0)" "\n\t" 186 : /*out*/ 187 : /*in*/ "r"(&block[0]) 188 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 189 ); 190 } while (block[2] != 1); 191 #endif 192 #elif defined(VGA_mips64) 193 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an 194 exception that can cause this function to fail. */ 195 #if defined (_MIPSEL) 196 unsigned long block[3] 197 = { (unsigned long)p, (unsigned long)n, 0x0ULL }; 198 do { 199 __asm__ __volatile__( 200 "move $t0, %0" "\n\t" 201 "ld $t1, 0($t0)" "\n\t" // p 202 "ld $t2, 8($t0)" "\n\t" // n 203 "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF 204 "li $s0, 0xFF" "\n\t" 205 "nor $s0, $s0, $zero" "\n\t" // $s0 = 0xFFFFFF00 206 "ll $t3, 0($t1)" "\n\t" // $t3 = old value 207 "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFFFF00 208 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n 209 "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF 210 "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0 211 "sc $t3, 0($t1)" "\n\t" 212 "sw $t3, 16($t0)" "\n\t" // save result 213 : /*out*/ 214 : /*in*/ "r"(&block[0]) 215 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0" 216 ); 217 } while (block[2] != 1); 218 #elif defined (_MIPSEB) 219 unsigned long block[3] 220 = { (unsigned long)p, (unsigned long)n << 56, 0x0 }; 221 do { 222 __asm__ __volatile__( 223 "move $t0, %0" "\n\t" 224 "ld $t1, 0($t0)" "\n\t" // p 225 "ld $t2, 8($t0)" "\n\t" // n 226 "lld $t3, 0($t1)" "\n\t" 227 "daddu $t3, $t3, $t2" "\n\t" 228 "scd $t3, 0($t1)" "\n\t" 229 "sd $t3, 16($t0)" "\n\t" 230 : /*out*/ 231 : /*in*/ "r"(&block[0]) 232 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 233 ); 234 } while (block[2] != 1); 235 #endif 236 #elif defined(VGA_tilegx) 237 int i; 238 unsigned int *p4 = (unsigned int *)(((unsigned long long)p + 3) & (~3ULL)); 239 unsigned int mask = (0xff) << ((int)p & 3); 240 unsigned int add = (n & 0xff) << ((int)p & 3); 241 unsigned int x, new; 242 243 while(1) { 244 x = *p4; 245 new = (x & (~mask)) | ((x + add) & mask); 246 __insn_mtspr(0x2780, x); 247 if ( __insn_cmpexch4(p4, new) == x) 248 break; 249 } 250 #else 251 # error "Unsupported arch" 252 #endif 253 } 254 255 256 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n ) 257 { 258 #if defined(VGA_x86) 259 unsigned long block[2]; 260 block[0] = (unsigned long)p; 261 block[1] = n; 262 __asm__ __volatile__( 263 "movl 0(%%esi),%%eax" "\n\t" 264 "movl 4(%%esi),%%ebx" "\n\t" 265 "lock; addw %%bx,(%%eax)" "\n" 266 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" 267 ); 268 #elif defined(VGA_amd64) 269 unsigned long block[2]; 270 block[0] = (unsigned long)p; 271 block[1] = n; 272 __asm__ __volatile__( 273 "movq 0(%%rsi),%%rax" "\n\t" 274 "movq 8(%%rsi),%%rbx" "\n\t" 275 "lock; addw %%bx,(%%rax)" "\n" 276 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 277 ); 278 #elif defined(VGA_ppc32) 279 /* Nasty hack. Does correctly atomically do *p += n, but only if p 280 is 8-aligned -- guaranteed by caller. */ 281 unsigned long success; 282 do { 283 __asm__ __volatile__( 284 "lwarx 15,0,%1" "\n\t" 285 "add 15,15,%2" "\n\t" 286 "stwcx. 15,0,%1" "\n\t" 287 "mfcr %0" "\n\t" 288 "srwi %0,%0,29" "\n\t" 289 "andi. %0,%0,1" "\n" 290 : /*out*/"=b"(success) 291 : /*in*/ "b"(p), "b"(((unsigned long)n) << 16) 292 : /*trash*/ "memory", "cc", "r15" 293 ); 294 } while (success != 1); 295 #elif defined(VGA_ppc64be) 296 /* Nasty hack. Does correctly atomically do *p += n, but only if p 297 is 8-aligned -- guaranteed by caller. */ 298 unsigned long success; 299 do { 300 __asm__ __volatile__( 301 "ldarx 15,0,%1" "\n\t" 302 "add 15,15,%2" "\n\t" 303 "stdcx. 15,0,%1" "\n\t" 304 "mfcr %0" "\n\t" 305 "srwi %0,%0,29" "\n\t" 306 "andi. %0,%0,1" "\n" 307 : /*out*/"=b"(success) 308 : /*in*/ "b"(p), "b"(((unsigned long)n) << 48) 309 : /*trash*/ "memory", "cc", "r15" 310 ); 311 } while (success != 1); 312 #elif defined(VGA_ppc64le) 313 /* Nasty hack. Does correctly atomically do *p += n, but only if p 314 is 8-aligned -- guaranteed by caller. */ 315 unsigned long success; 316 do { 317 __asm__ __volatile__( 318 "ldarx 15,0,%1" "\n\t" 319 "add 15,15,%2" "\n\t" 320 "stdcx. 15,0,%1" "\n\t" 321 "mfcr %0" "\n\t" 322 "srwi %0,%0,29" "\n\t" 323 "andi. %0,%0,1" "\n" 324 : /*out*/"=b"(success) 325 : /*in*/ "b"(p), "b"(((unsigned long)n)) 326 : /*trash*/ "memory", "cc", "r15" 327 ); 328 } while (success != 1); 329 #elif defined(VGA_arm) 330 unsigned int block[3] 331 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; 332 do { 333 __asm__ __volatile__( 334 "mov r5, %0" "\n\t" 335 "ldr r9, [r5, #0]" "\n\t" // p 336 "ldr r10, [r5, #4]" "\n\t" // n 337 "ldrexh r8, [r9]" "\n\t" 338 "add r8, r8, r10" "\n\t" 339 "strexh r4, r8, [r9]" "\n\t" 340 "str r4, [r5, #8]" "\n\t" 341 : /*out*/ 342 : /*in*/ "r"(&block[0]) 343 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" 344 ); 345 } while (block[2] != 0); 346 #elif defined(VGA_arm64) 347 unsigned long long int block[3] 348 = { (unsigned long long int)p, (unsigned long long int)n, 349 0xFFFFFFFFFFFFFFFFULL}; 350 do { 351 __asm__ __volatile__( 352 "mov x5, %0" "\n\t" 353 "ldr x9, [x5, #0]" "\n\t" // p 354 "ldr x10, [x5, #8]" "\n\t" // n 355 "ldxrh w8, [x9]" "\n\t" 356 "add x8, x8, x10" "\n\t" 357 "stxrh w4, w8, [x9]" "\n\t" 358 "str x4, [x5, #16]" "\n\t" 359 : /*out*/ 360 : /*in*/ "r"(&block[0]) 361 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" 362 ); 363 } while (block[2] != 0); 364 #elif defined(VGA_s390x) 365 int dummy; 366 __asm__ __volatile__( 367 " l 0,%0\n\t" 368 "0: st 0,%1\n\t" 369 " icm 1,3,%1\n\t" 370 " ar 1,%2\n\t" 371 " stcm 1,3,%1\n\t" 372 " l 1,%1\n\t" 373 " cs 0,1,%0\n\t" 374 " jl 0b\n\t" 375 : "+m" (*p), "+m" (dummy) 376 : "d" (n) 377 : "cc", "memory", "0", "1"); 378 #elif defined(VGA_mips32) 379 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an 380 exception that can cause this function to fail. */ 381 #if defined (_MIPSEL) 382 unsigned int block[3] 383 = { (unsigned int)p, (unsigned int)n, 0x0 }; 384 do { 385 __asm__ __volatile__( 386 "move $t0, %0" "\n\t" 387 "lw $t1, 0($t0)" "\n\t" // p 388 "lw $t2, 4($t0)" "\n\t" // n 389 "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF 390 "li $t4, 0xFFFF" "\n\t" 391 "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFF0000 392 "ll $t3, 0($t1)" "\n\t" // $t3 = old value 393 "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFF0000 394 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n 395 "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF 396 "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4 397 "sc $t3, 0($t1)" "\n\t" 398 "sw $t3, 8($t0)" "\n\t" // save result 399 : /*out*/ 400 : /*in*/ "r"(&block[0]) 401 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4" 402 ); 403 } while (block[2] != 1); 404 #elif defined (_MIPSEB) 405 unsigned int block[3] 406 = { (unsigned int)p, (unsigned int)n << 16, 0x0 }; 407 do { 408 __asm__ __volatile__( 409 "move $t0, %0" "\n\t" 410 "lw $t1, 0($t0)" "\n\t" // p 411 "lw $t2, 4($t0)" "\n\t" // n 412 "ll $t3, 0($t1)" "\n\t" 413 "addu $t3, $t3, $t2" "\n\t" 414 "sc $t3, 0($t1)" "\n\t" 415 "sw $t3, 8($t0)" "\n\t" 416 : /*out*/ 417 : /*in*/ "r"(&block[0]) 418 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 419 ); 420 } while (block[2] != 1); 421 #endif 422 #elif defined(VGA_mips64) 423 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an 424 exception that can cause this function to fail. */ 425 #if defined (_MIPSEL) 426 unsigned long block[3] 427 = { (unsigned long)p, (unsigned long)n, 0x0ULL }; 428 do { 429 __asm__ __volatile__( 430 "move $t0, %0" "\n\t" 431 "ld $t1, 0($t0)" "\n\t" // p 432 "ld $t2, 8($t0)" "\n\t" // n 433 "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF 434 "li $s0, 0xFFFF" "\n\t" 435 "nor $s0, $s0, $zero" "\n\t" // $s0= 0xFFFF0000 436 "ll $t3, 0($t1)" "\n\t" // $t3 = old value 437 "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFF0000 438 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n 439 "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF 440 "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0 441 "sc $t3, 0($t1)" "\n\t" 442 "sw $t3, 16($t0)" "\n\t" // save result 443 : /*out*/ 444 : /*in*/ "r"(&block[0]) 445 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0" 446 ); 447 } while (block[2] != 1); 448 #elif defined (_MIPSEB) 449 unsigned long block[3] 450 = { (unsigned long)p, (unsigned long)n << 48, 0x0 }; 451 do { 452 __asm__ __volatile__( 453 "move $t0, %0" "\n\t" 454 "ld $t1, 0($t0)" "\n\t" // p 455 "ld $t2, 8($t0)" "\n\t" // n 456 "lld $t3, 0($t1)" "\n\t" 457 "daddu $t3, $t3, $t2" "\n\t" 458 "scd $t3, 0($t1)" "\n\t" 459 "sd $t3, 16($t0)" "\n\t" 460 : /*out*/ 461 : /*in*/ "r"(&block[0]) 462 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 463 ); 464 } while (block[2] != 1); 465 #endif 466 #elif defined(VGA_tilegx) 467 int i; 468 unsigned int *p4 = (unsigned int *)(((unsigned long long)p + 3) & (~3ULL)); 469 unsigned int mask = (0xffff) << ((int)p & 3); 470 unsigned int add = (n & 0xffff) << ((int)p & 3); 471 unsigned int x, new; 472 473 while(1) { 474 x = *p4; 475 new = (x & (~mask)) | ((x + add) & mask); 476 __insn_mtspr(0x2780, x); 477 if ( __insn_cmpexch4(p4, new) == x) 478 break; 479 } 480 #else 481 # error "Unsupported arch" 482 #endif 483 } 484 485 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n ) 486 { 487 #if defined(VGA_x86) 488 unsigned long block[2]; 489 block[0] = (unsigned long)p; 490 block[1] = n; 491 __asm__ __volatile__( 492 "movl 0(%%esi),%%eax" "\n\t" 493 "movl 4(%%esi),%%ebx" "\n\t" 494 "lock; addl %%ebx,(%%eax)" "\n" 495 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" 496 ); 497 #elif defined(VGA_amd64) 498 unsigned long block[2]; 499 block[0] = (unsigned long)p; 500 block[1] = n; 501 __asm__ __volatile__( 502 "movq 0(%%rsi),%%rax" "\n\t" 503 "movq 8(%%rsi),%%rbx" "\n\t" 504 "lock; addl %%ebx,(%%rax)" "\n" 505 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 506 ); 507 #elif defined(VGA_ppc32) 508 unsigned long success; 509 do { 510 __asm__ __volatile__( 511 "lwarx 15,0,%1" "\n\t" 512 "add 15,15,%2" "\n\t" 513 "stwcx. 15,0,%1" "\n\t" 514 "mfcr %0" "\n\t" 515 "srwi %0,%0,29" "\n\t" 516 "andi. %0,%0,1" "\n" 517 : /*out*/"=b"(success) 518 : /*in*/ "b"(p), "b"(n) 519 : /*trash*/ "memory", "cc", "r15" 520 ); 521 } while (success != 1); 522 #elif defined(VGA_ppc64be) 523 /* Nasty hack. Does correctly atomically do *p += n, but only if p 524 is 8-aligned -- guaranteed by caller. */ 525 unsigned long success; 526 do { 527 __asm__ __volatile__( 528 "ldarx 15,0,%1" "\n\t" 529 "add 15,15,%2" "\n\t" 530 "stdcx. 15,0,%1" "\n\t" 531 "mfcr %0" "\n\t" 532 "srwi %0,%0,29" "\n\t" 533 "andi. %0,%0,1" "\n" 534 : /*out*/"=b"(success) 535 : /*in*/ "b"(p), "b"(((unsigned long)n) << 32) 536 : /*trash*/ "memory", "cc", "r15" 537 ); 538 } while (success != 1); 539 #elif defined(VGA_ppc64le) 540 /* Nasty hack. Does correctly atomically do *p += n, but only if p 541 is 8-aligned -- guaranteed by caller. */ 542 unsigned long success; 543 do { 544 __asm__ __volatile__( 545 "ldarx 15,0,%1" "\n\t" 546 "add 15,15,%2" "\n\t" 547 "stdcx. 15,0,%1" "\n\t" 548 "mfcr %0" "\n\t" 549 "srwi %0,%0,29" "\n\t" 550 "andi. %0,%0,1" "\n" 551 : /*out*/"=b"(success) 552 : /*in*/ "b"(p), "b"(((unsigned long)n)) 553 : /*trash*/ "memory", "cc", "r15" 554 ); 555 } while (success != 1); 556 #elif defined(VGA_arm) 557 unsigned int block[3] 558 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; 559 do { 560 __asm__ __volatile__( 561 "mov r5, %0" "\n\t" 562 "ldr r9, [r5, #0]" "\n\t" // p 563 "ldr r10, [r5, #4]" "\n\t" // n 564 "ldrex r8, [r9]" "\n\t" 565 "add r8, r8, r10" "\n\t" 566 "strex r4, r8, [r9]" "\n\t" 567 "str r4, [r5, #8]" "\n\t" 568 : /*out*/ 569 : /*in*/ "r"(&block[0]) 570 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" 571 ); 572 } while (block[2] != 0); 573 #elif defined(VGA_arm64) 574 unsigned long long int block[3] 575 = { (unsigned long long int)p, (unsigned long long int)n, 576 0xFFFFFFFFFFFFFFFFULL}; 577 do { 578 __asm__ __volatile__( 579 "mov x5, %0" "\n\t" 580 "ldr x9, [x5, #0]" "\n\t" // p 581 "ldr x10, [x5, #8]" "\n\t" // n 582 "ldxr w8, [x9]" "\n\t" 583 "add x8, x8, x10" "\n\t" 584 "stxr w4, w8, [x9]" "\n\t" 585 "str x4, [x5, #16]" "\n\t" 586 : /*out*/ 587 : /*in*/ "r"(&block[0]) 588 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" 589 ); 590 } while (block[2] != 0); 591 #elif defined(VGA_s390x) 592 __asm__ __volatile__( 593 " l 0,%0\n\t" 594 "0: lr 1,0\n\t" 595 " ar 1,%1\n\t" 596 " cs 0,1,%0\n\t" 597 " jl 0b\n\t" 598 : "+m" (*p) 599 : "d" (n) 600 : "cc", "memory", "0", "1"); 601 #elif defined(VGA_mips32) 602 unsigned int block[3] 603 = { (unsigned int)p, (unsigned int)n, 0x0 }; 604 do { 605 __asm__ __volatile__( 606 "move $t0, %0" "\n\t" 607 "lw $t1, 0($t0)" "\n\t" // p 608 "lw $t2, 4($t0)" "\n\t" // n 609 "ll $t3, 0($t1)" "\n\t" 610 "addu $t3, $t3, $t2" "\n\t" 611 "sc $t3, 0($t1)" "\n\t" 612 "sw $t3, 8($t0)" "\n\t" 613 : /*out*/ 614 : /*in*/ "r"(&block[0]) 615 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 616 ); 617 } while (block[2] != 1); 618 #elif defined(VGA_mips64) 619 unsigned long block[3] 620 = { (unsigned long)p, (unsigned long)n, 0x0ULL }; 621 do { 622 __asm__ __volatile__( 623 "move $t0, %0" "\n\t" 624 "ld $t1, 0($t0)" "\n\t" // p 625 "ld $t2, 8($t0)" "\n\t" // n 626 "ll $t3, 0($t1)" "\n\t" 627 "addu $t3, $t3, $t2" "\n\t" 628 "sc $t3, 0($t1)" "\n\t" 629 "sd $t3, 16($t0)" "\n\t" 630 : /*out*/ 631 : /*in*/ "r"(&block[0]) 632 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 633 ); 634 } while (block[2] != 1); 635 #elif defined(VGA_tilegx) 636 __insn_fetchadd4(p, n); 637 #else 638 # error "Unsupported arch" 639 #endif 640 } 641 642 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n ) 643 { 644 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32) 645 /* do nothing; is not supported */ 646 #elif defined(VGA_amd64) 647 // this is a bit subtle. It relies on the fact that, on a 64-bit platform, 648 // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*) 649 unsigned long long int block[2]; 650 block[0] = (unsigned long long int)(unsigned long)p; 651 block[1] = n; 652 __asm__ __volatile__( 653 "movq 0(%%rsi),%%rax" "\n\t" 654 "movq 8(%%rsi),%%rbx" "\n\t" 655 "lock; addq %%rbx,(%%rax)" "\n" 656 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 657 ); 658 #elif defined(VGA_ppc64be) || defined(VGA_ppc64le) 659 unsigned long success; 660 do { 661 __asm__ __volatile__( 662 "ldarx 15,0,%1" "\n\t" 663 "add 15,15,%2" "\n\t" 664 "stdcx. 15,0,%1" "\n\t" 665 "mfcr %0" "\n\t" 666 "srwi %0,%0,29" "\n\t" 667 "andi. %0,%0,1" "\n" 668 : /*out*/"=b"(success) 669 : /*in*/ "b"(p), "b"(n) 670 : /*trash*/ "memory", "cc", "r15" 671 ); 672 } while (success != 1); 673 #elif defined(VGA_arm) 674 unsigned long long int block[3] 675 = { (unsigned long long int)(unsigned long)p, 676 (unsigned long long int)n, 677 0xFFFFFFFFFFFFFFFFULL }; 678 do { 679 __asm__ __volatile__( 680 "mov r5, %0" "\n\t" 681 "ldr r8, [r5, #0]" "\n\t" // p 682 "ldrd r2, r3, [r5, #8]" "\n\t" // n 683 "ldrexd r0, r1, [r8]" "\n\t" 684 "adds r2, r2, r0" "\n\t" 685 "adc r3, r3, r1" "\n\t" 686 "strexd r1, r2, r3, [r8]" "\n\t" 687 "str r1, [r5, #16]" "\n\t" 688 : /*out*/ 689 : /*in*/ "r"(&block[0]) 690 : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3" 691 ); 692 } while (block[2] != 0xFFFFFFFF00000000ULL); 693 #elif defined(VGA_arm64) 694 unsigned long long int block[3] 695 = { (unsigned long long int)p, (unsigned long long int)n, 696 0xFFFFFFFFFFFFFFFFULL}; 697 do { 698 __asm__ __volatile__( 699 "mov x5, %0" "\n\t" 700 "ldr x9, [x5, #0]" "\n\t" // p 701 "ldr x10, [x5, #8]" "\n\t" // n 702 "ldxr x8, [x9]" "\n\t" 703 "add x8, x8, x10" "\n\t" 704 "stxr w4, x8, [x9]" "\n\t" 705 "str x4, [x5, #16]" "\n\t" 706 : /*out*/ 707 : /*in*/ "r"(&block[0]) 708 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" 709 ); 710 } while (block[2] != 0); 711 #elif defined(VGA_s390x) 712 __asm__ __volatile__( 713 " lg 0,%0\n\t" 714 "0: lgr 1,0\n\t" 715 " agr 1,%1\n\t" 716 " csg 0,1,%0\n\t" 717 " jl 0b\n\t" 718 : "+m" (*p) 719 : "d" (n) 720 : "cc", "memory", "0", "1"); 721 #elif defined(VGA_mips64) 722 unsigned long block[3] 723 = { (unsigned long)p, (unsigned long)n, 0x0ULL }; 724 do { 725 __asm__ __volatile__( 726 "move $t0, %0" "\n\t" 727 "ld $t1, 0($t0)" "\n\t" // p 728 "ld $t2, 8($t0)" "\n\t" // n 729 "lld $t3, 0($t1)" "\n\t" 730 "daddu $t3, $t3, $t2" "\n\t" 731 "scd $t3, 0($t1)" "\n\t" 732 "sd $t3, 16($t0)" "\n\t" 733 : /*out*/ 734 : /*in*/ "r"(&block[0]) 735 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 736 ); 737 } while (block[2] != 1); 738 #elif defined(VGA_tilegx) 739 __insn_fetchadd(p, n); 740 #else 741 # error "Unsupported arch" 742 #endif 743 } 744 745 int main ( int argc, char** argv ) 746 { 747 int i, status; 748 char* page; 749 char* p8; 750 short* p16; 751 int* p32; 752 long long int* p64; 753 pid_t child, p2; 754 755 printf("parent, pre-fork\n"); 756 757 page = mmap( 0, sysconf(_SC_PAGESIZE), 758 PROT_READ|PROT_WRITE, 759 MAP_ANONYMOUS|MAP_SHARED, -1, 0 ); 760 if (page == MAP_FAILED) { 761 perror("mmap failed"); 762 exit(1); 763 } 764 765 p8 = (char*)(page+0); 766 p16 = (short*)(page+256); 767 p32 = (int*)(page+512); 768 p64 = (long long int*)(page+768); 769 770 assert( IS_8_ALIGNED(p8) ); 771 assert( IS_8_ALIGNED(p16) ); 772 assert( IS_8_ALIGNED(p32) ); 773 assert( IS_8_ALIGNED(p64) ); 774 775 memset(page, 0, 1024); 776 777 *p8 = 0; 778 *p16 = 0; 779 *p32 = 0; 780 *p64 = 0; 781 782 child = fork(); 783 if (child == -1) { 784 perror("fork() failed\n"); 785 return 1; 786 } 787 788 if (child == 0) { 789 /* --- CHILD --- */ 790 printf("child\n"); 791 for (i = 0; i < NNN; i++) { 792 atomic_add_8bit(p8, 1); 793 atomic_add_16bit(p16, 1); 794 atomic_add_32bit(p32, 1); 795 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ 796 } 797 return 1; 798 /* NOTREACHED */ 799 800 } 801 802 /* --- PARENT --- */ 803 804 printf("parent\n"); 805 806 for (i = 0; i < NNN; i++) { 807 atomic_add_8bit(p8, 1); 808 atomic_add_16bit(p16, 1); 809 atomic_add_32bit(p32, 1); 810 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ 811 } 812 813 p2 = waitpid(child, &status, 0); 814 assert(p2 == child); 815 816 /* assert that child finished normally */ 817 assert(WIFEXITED(status)); 818 819 printf("FINAL VALUES: 8 bit %d, 16 bit %d, 32 bit %d, 64 bit %lld\n", 820 (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 ); 821 822 if (-74 == (int)(*(signed char*)p8) 823 && 32694 == (int)(*p16) 824 && 6913974 == *p32 825 && (0LL == *p64 || 682858642110LL == *p64)) { 826 printf("PASS\n"); 827 } else { 828 printf("FAIL -- see source code for expected values\n"); 829 } 830 831 printf("parent exits\n"); 832 833 return 0; 834 } 835