1 2 /* This is an example of a program which does atomic memory operations 3 between two processes which share a page. Valgrind 3.4.1 and 4 earlier produce incorrect answers because it does not preserve 5 atomicity of the relevant instructions in the generated code; but 6 the post-DCAS-merge versions of Valgrind do behave correctly. */ 7 8 /* On ARM, this can be compiled into either ARM or Thumb code, so as 9 to test both A and T encodings of LDREX/STREX et al. Also on ARM, 10 it tests doubleword atomics (LDREXD, STREXD) which I don't think it 11 does on any other platform. */ 12 13 #include <stdlib.h> 14 #include <stdio.h> 15 #include <string.h> 16 #include <assert.h> 17 #include <unistd.h> 18 #include <sys/wait.h> 19 #include "tests/sys_mman.h" 20 21 #define NNN 3456987 22 23 #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7)) 24 25 26 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n ) 27 { 28 #if defined(VGA_x86) 29 unsigned long block[2]; 30 block[0] = (unsigned long)p; 31 block[1] = n; 32 __asm__ __volatile__( 33 "movl 0(%%esi),%%eax" "\n\t" 34 "movl 4(%%esi),%%ebx" "\n\t" 35 "lock; addb %%bl,(%%eax)" "\n" 36 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" 37 ); 38 #elif defined(VGA_amd64) 39 unsigned long block[2]; 40 block[0] = (unsigned long)p; 41 block[1] = n; 42 __asm__ __volatile__( 43 "movq 0(%%rsi),%%rax" "\n\t" 44 "movq 8(%%rsi),%%rbx" "\n\t" 45 "lock; addb %%bl,(%%rax)" "\n" 46 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 47 ); 48 #elif defined(VGA_ppc32) 49 /* Nasty hack. Does correctly atomically do *p += n, but only if p 50 is 4-aligned -- guaranteed by caller. */ 51 unsigned long success; 52 do { 53 __asm__ __volatile__( 54 "lwarx 15,0,%1" "\n\t" 55 "add 15,15,%2" "\n\t" 56 "stwcx. 15,0,%1" "\n\t" 57 "mfcr %0" "\n\t" 58 "srwi %0,%0,29" "\n\t" 59 "andi. %0,%0,1" "\n" 60 : /*out*/"=b"(success) 61 : /*in*/ "b"(p), "b"(((unsigned long)n) << 24) 62 : /*trash*/ "memory", "cc", "r15" 63 ); 64 } while (success != 1); 65 #elif defined(VGA_ppc64) 66 /* Nasty hack. Does correctly atomically do *p += n, but only if p 67 is 8-aligned -- guaranteed by caller. */ 68 unsigned long success; 69 do { 70 __asm__ __volatile__( 71 "ldarx 15,0,%1" "\n\t" 72 "add 15,15,%2" "\n\t" 73 "stdcx. 15,0,%1" "\n\t" 74 "mfcr %0" "\n\t" 75 "srwi %0,%0,29" "\n\t" 76 "andi. %0,%0,1" "\n" 77 : /*out*/"=b"(success) 78 : /*in*/ "b"(p), "b"(((unsigned long)n) << 56) 79 : /*trash*/ "memory", "cc", "r15" 80 ); 81 } while (success != 1); 82 #elif defined(VGA_arm) 83 unsigned int block[3] 84 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; 85 do { 86 __asm__ __volatile__( 87 "mov r5, %0" "\n\t" 88 "ldr r9, [r5, #0]" "\n\t" // p 89 "ldr r10, [r5, #4]" "\n\t" // n 90 "ldrexb r8, [r9]" "\n\t" 91 "add r8, r8, r10" "\n\t" 92 "strexb r4, r8, [r9]" "\n\t" 93 "str r4, [r5, #8]" "\n\t" 94 : /*out*/ 95 : /*in*/ "r"(&block[0]) 96 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" 97 ); 98 } while (block[2] != 0); 99 #elif defined(VGA_arm64) 100 unsigned long long int block[3] 101 = { (unsigned long long int)p, (unsigned long long int)n, 102 0xFFFFFFFFFFFFFFFFULL}; 103 do { 104 __asm__ __volatile__( 105 "mov x5, %0" "\n\t" 106 "ldr x9, [x5, #0]" "\n\t" // p 107 "ldr x10, [x5, #8]" "\n\t" // n 108 "ldxrb w8, [x9]" "\n\t" 109 "add x8, x8, x10" "\n\t" 110 "stxrb w4, w8, [x9]" "\n\t" 111 "str x4, [x5, #16]" "\n\t" 112 : /*out*/ 113 : /*in*/ "r"(&block[0]) 114 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" 115 ); 116 } while (block[2] != 0); 117 #elif defined(VGA_s390x) 118 int dummy; 119 __asm__ __volatile__( 120 " l 0,%0\n\t" 121 "0: st 0,%1\n\t" 122 " icm 1,1,%1\n\t" 123 " ar 1,%2\n\t" 124 " stcm 1,1,%1\n\t" 125 " l 1,%1\n\t" 126 " cs 0,1,%0\n\t" 127 " jl 0b\n\t" 128 : "+m" (*p), "+m" (dummy) 129 : "d" (n) 130 : "cc", "memory", "0", "1"); 131 #elif defined(VGA_mips32) 132 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an 133 exception that can cause this function to fail. */ 134 #if defined (_MIPSEL) 135 unsigned int block[3] 136 = { (unsigned int)p, (unsigned int)n, 0x0 }; 137 do { 138 __asm__ __volatile__( 139 "move $t0, %0" "\n\t" 140 "lw $t1, 0($t0)" "\n\t" // p 141 "lw $t2, 4($t0)" "\n\t" // n 142 "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF 143 "li $t4, 0xFF" "\n\t" 144 "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFFFF00 145 "ll $t3, 0($t1)" "\n\t" // $t3 = old value 146 "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFFFF00 147 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n 148 "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF 149 "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4 150 "sc $t3, 0($t1)" "\n\t" 151 "sw $t3, 8($t0)" "\n\t" // save result 152 : /*out*/ 153 : /*in*/ "r"(&block[0]) 154 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4" 155 ); 156 } while (block[2] != 1); 157 #elif defined (_MIPSEB) 158 unsigned int block[3] 159 = { (unsigned int)p, (unsigned int)n << 24, 0x0 }; 160 do { 161 __asm__ __volatile__( 162 "move $t0, %0" "\n\t" 163 "lw $t1, 0($t0)" "\n\t" // p 164 "lw $t2, 4($t0)" "\n\t" // n 165 "ll $t3, 0($t1)" "\n\t" 166 "addu $t3, $t3, $t2" "\n\t" 167 "sc $t3, 0($t1)" "\n\t" 168 "sw $t3, 8($t0)" "\n\t" 169 : /*out*/ 170 : /*in*/ "r"(&block[0]) 171 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 172 ); 173 } while (block[2] != 1); 174 #endif 175 #elif defined(VGA_mips64) 176 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an 177 exception that can cause this function to fail. */ 178 #if defined (_MIPSEL) 179 unsigned long block[3] 180 = { (unsigned long)p, (unsigned long)n, 0x0ULL }; 181 do { 182 __asm__ __volatile__( 183 "move $t0, %0" "\n\t" 184 "ld $t1, 0($t0)" "\n\t" // p 185 "ld $t2, 8($t0)" "\n\t" // n 186 "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF 187 "li $s0, 0xFF" "\n\t" 188 "nor $s0, $s0, $zero" "\n\t" // $s0 = 0xFFFFFF00 189 "ll $t3, 0($t1)" "\n\t" // $t3 = old value 190 "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFFFF00 191 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n 192 "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF 193 "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0 194 "sc $t3, 0($t1)" "\n\t" 195 "sw $t3, 16($t0)" "\n\t" // save result 196 : /*out*/ 197 : /*in*/ "r"(&block[0]) 198 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0" 199 ); 200 } while (block[2] != 1); 201 #elif defined (_MIPSEB) 202 unsigned long block[3] 203 = { (unsigned long)p, (unsigned long)n << 56, 0x0 }; 204 do { 205 __asm__ __volatile__( 206 "move $t0, %0" "\n\t" 207 "ld $t1, 0($t0)" "\n\t" // p 208 "ld $t2, 8($t0)" "\n\t" // n 209 "lld $t3, 0($t1)" "\n\t" 210 "daddu $t3, $t3, $t2" "\n\t" 211 "scd $t3, 0($t1)" "\n\t" 212 "sd $t3, 16($t0)" "\n\t" 213 : /*out*/ 214 : /*in*/ "r"(&block[0]) 215 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 216 ); 217 } while (block[2] != 1); 218 #endif 219 #else 220 # error "Unsupported arch" 221 #endif 222 } 223 224 225 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n ) 226 { 227 #if defined(VGA_x86) 228 unsigned long block[2]; 229 block[0] = (unsigned long)p; 230 block[1] = n; 231 __asm__ __volatile__( 232 "movl 0(%%esi),%%eax" "\n\t" 233 "movl 4(%%esi),%%ebx" "\n\t" 234 "lock; addw %%bx,(%%eax)" "\n" 235 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" 236 ); 237 #elif defined(VGA_amd64) 238 unsigned long block[2]; 239 block[0] = (unsigned long)p; 240 block[1] = n; 241 __asm__ __volatile__( 242 "movq 0(%%rsi),%%rax" "\n\t" 243 "movq 8(%%rsi),%%rbx" "\n\t" 244 "lock; addw %%bx,(%%rax)" "\n" 245 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 246 ); 247 #elif defined(VGA_ppc32) 248 /* Nasty hack. Does correctly atomically do *p += n, but only if p 249 is 8-aligned -- guaranteed by caller. */ 250 unsigned long success; 251 do { 252 __asm__ __volatile__( 253 "lwarx 15,0,%1" "\n\t" 254 "add 15,15,%2" "\n\t" 255 "stwcx. 15,0,%1" "\n\t" 256 "mfcr %0" "\n\t" 257 "srwi %0,%0,29" "\n\t" 258 "andi. %0,%0,1" "\n" 259 : /*out*/"=b"(success) 260 : /*in*/ "b"(p), "b"(((unsigned long)n) << 16) 261 : /*trash*/ "memory", "cc", "r15" 262 ); 263 } while (success != 1); 264 #elif defined(VGA_ppc64) 265 /* Nasty hack. Does correctly atomically do *p += n, but only if p 266 is 8-aligned -- guaranteed by caller. */ 267 unsigned long success; 268 do { 269 __asm__ __volatile__( 270 "ldarx 15,0,%1" "\n\t" 271 "add 15,15,%2" "\n\t" 272 "stdcx. 15,0,%1" "\n\t" 273 "mfcr %0" "\n\t" 274 "srwi %0,%0,29" "\n\t" 275 "andi. %0,%0,1" "\n" 276 : /*out*/"=b"(success) 277 : /*in*/ "b"(p), "b"(((unsigned long)n) << 48) 278 : /*trash*/ "memory", "cc", "r15" 279 ); 280 } while (success != 1); 281 #elif defined(VGA_arm) 282 unsigned int block[3] 283 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; 284 do { 285 __asm__ __volatile__( 286 "mov r5, %0" "\n\t" 287 "ldr r9, [r5, #0]" "\n\t" // p 288 "ldr r10, [r5, #4]" "\n\t" // n 289 "ldrexh r8, [r9]" "\n\t" 290 "add r8, r8, r10" "\n\t" 291 "strexh r4, r8, [r9]" "\n\t" 292 "str r4, [r5, #8]" "\n\t" 293 : /*out*/ 294 : /*in*/ "r"(&block[0]) 295 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" 296 ); 297 } while (block[2] != 0); 298 #elif defined(VGA_arm64) 299 unsigned long long int block[3] 300 = { (unsigned long long int)p, (unsigned long long int)n, 301 0xFFFFFFFFFFFFFFFFULL}; 302 do { 303 __asm__ __volatile__( 304 "mov x5, %0" "\n\t" 305 "ldr x9, [x5, #0]" "\n\t" // p 306 "ldr x10, [x5, #8]" "\n\t" // n 307 "ldxrh w8, [x9]" "\n\t" 308 "add x8, x8, x10" "\n\t" 309 "stxrh w4, w8, [x9]" "\n\t" 310 "str x4, [x5, #16]" "\n\t" 311 : /*out*/ 312 : /*in*/ "r"(&block[0]) 313 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" 314 ); 315 } while (block[2] != 0); 316 #elif defined(VGA_s390x) 317 int dummy; 318 __asm__ __volatile__( 319 " l 0,%0\n\t" 320 "0: st 0,%1\n\t" 321 " icm 1,3,%1\n\t" 322 " ar 1,%2\n\t" 323 " stcm 1,3,%1\n\t" 324 " l 1,%1\n\t" 325 " cs 0,1,%0\n\t" 326 " jl 0b\n\t" 327 : "+m" (*p), "+m" (dummy) 328 : "d" (n) 329 : "cc", "memory", "0", "1"); 330 #elif defined(VGA_mips32) 331 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an 332 exception that can cause this function to fail. */ 333 #if defined (_MIPSEL) 334 unsigned int block[3] 335 = { (unsigned int)p, (unsigned int)n, 0x0 }; 336 do { 337 __asm__ __volatile__( 338 "move $t0, %0" "\n\t" 339 "lw $t1, 0($t0)" "\n\t" // p 340 "lw $t2, 4($t0)" "\n\t" // n 341 "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF 342 "li $t4, 0xFFFF" "\n\t" 343 "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFF0000 344 "ll $t3, 0($t1)" "\n\t" // $t3 = old value 345 "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFF0000 346 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n 347 "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF 348 "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4 349 "sc $t3, 0($t1)" "\n\t" 350 "sw $t3, 8($t0)" "\n\t" // save result 351 : /*out*/ 352 : /*in*/ "r"(&block[0]) 353 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4" 354 ); 355 } while (block[2] != 1); 356 #elif defined (_MIPSEB) 357 unsigned int block[3] 358 = { (unsigned int)p, (unsigned int)n << 16, 0x0 }; 359 do { 360 __asm__ __volatile__( 361 "move $t0, %0" "\n\t" 362 "lw $t1, 0($t0)" "\n\t" // p 363 "lw $t2, 4($t0)" "\n\t" // n 364 "ll $t3, 0($t1)" "\n\t" 365 "addu $t3, $t3, $t2" "\n\t" 366 "sc $t3, 0($t1)" "\n\t" 367 "sw $t3, 8($t0)" "\n\t" 368 : /*out*/ 369 : /*in*/ "r"(&block[0]) 370 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 371 ); 372 } while (block[2] != 1); 373 #endif 374 #elif defined(VGA_mips64) 375 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an 376 exception that can cause this function to fail. */ 377 #if defined (_MIPSEL) 378 unsigned long block[3] 379 = { (unsigned long)p, (unsigned long)n, 0x0ULL }; 380 do { 381 __asm__ __volatile__( 382 "move $t0, %0" "\n\t" 383 "ld $t1, 0($t0)" "\n\t" // p 384 "ld $t2, 8($t0)" "\n\t" // n 385 "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF 386 "li $s0, 0xFFFF" "\n\t" 387 "nor $s0, $s0, $zero" "\n\t" // $s0= 0xFFFF0000 388 "ll $t3, 0($t1)" "\n\t" // $t3 = old value 389 "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFF0000 390 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n 391 "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF 392 "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0 393 "sc $t3, 0($t1)" "\n\t" 394 "sw $t3, 16($t0)" "\n\t" // save result 395 : /*out*/ 396 : /*in*/ "r"(&block[0]) 397 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0" 398 ); 399 } while (block[2] != 1); 400 #elif defined (_MIPSEB) 401 unsigned long block[3] 402 = { (unsigned long)p, (unsigned long)n << 48, 0x0 }; 403 do { 404 __asm__ __volatile__( 405 "move $t0, %0" "\n\t" 406 "ld $t1, 0($t0)" "\n\t" // p 407 "ld $t2, 8($t0)" "\n\t" // n 408 "lld $t3, 0($t1)" "\n\t" 409 "daddu $t3, $t3, $t2" "\n\t" 410 "scd $t3, 0($t1)" "\n\t" 411 "sd $t3, 16($t0)" "\n\t" 412 : /*out*/ 413 : /*in*/ "r"(&block[0]) 414 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 415 ); 416 } while (block[2] != 1); 417 #endif 418 #else 419 # error "Unsupported arch" 420 #endif 421 } 422 423 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n ) 424 { 425 #if defined(VGA_x86) 426 unsigned long block[2]; 427 block[0] = (unsigned long)p; 428 block[1] = n; 429 __asm__ __volatile__( 430 "movl 0(%%esi),%%eax" "\n\t" 431 "movl 4(%%esi),%%ebx" "\n\t" 432 "lock; addl %%ebx,(%%eax)" "\n" 433 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" 434 ); 435 #elif defined(VGA_amd64) 436 unsigned long block[2]; 437 block[0] = (unsigned long)p; 438 block[1] = n; 439 __asm__ __volatile__( 440 "movq 0(%%rsi),%%rax" "\n\t" 441 "movq 8(%%rsi),%%rbx" "\n\t" 442 "lock; addl %%ebx,(%%rax)" "\n" 443 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 444 ); 445 #elif defined(VGA_ppc32) 446 unsigned long success; 447 do { 448 __asm__ __volatile__( 449 "lwarx 15,0,%1" "\n\t" 450 "add 15,15,%2" "\n\t" 451 "stwcx. 15,0,%1" "\n\t" 452 "mfcr %0" "\n\t" 453 "srwi %0,%0,29" "\n\t" 454 "andi. %0,%0,1" "\n" 455 : /*out*/"=b"(success) 456 : /*in*/ "b"(p), "b"(n) 457 : /*trash*/ "memory", "cc", "r15" 458 ); 459 } while (success != 1); 460 #elif defined(VGA_ppc64) 461 /* Nasty hack. Does correctly atomically do *p += n, but only if p 462 is 8-aligned -- guaranteed by caller. */ 463 unsigned long success; 464 do { 465 __asm__ __volatile__( 466 "ldarx 15,0,%1" "\n\t" 467 "add 15,15,%2" "\n\t" 468 "stdcx. 15,0,%1" "\n\t" 469 "mfcr %0" "\n\t" 470 "srwi %0,%0,29" "\n\t" 471 "andi. %0,%0,1" "\n" 472 : /*out*/"=b"(success) 473 : /*in*/ "b"(p), "b"(((unsigned long)n) << 32) 474 : /*trash*/ "memory", "cc", "r15" 475 ); 476 } while (success != 1); 477 #elif defined(VGA_arm) 478 unsigned int block[3] 479 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; 480 do { 481 __asm__ __volatile__( 482 "mov r5, %0" "\n\t" 483 "ldr r9, [r5, #0]" "\n\t" // p 484 "ldr r10, [r5, #4]" "\n\t" // n 485 "ldrex r8, [r9]" "\n\t" 486 "add r8, r8, r10" "\n\t" 487 "strex r4, r8, [r9]" "\n\t" 488 "str r4, [r5, #8]" "\n\t" 489 : /*out*/ 490 : /*in*/ "r"(&block[0]) 491 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" 492 ); 493 } while (block[2] != 0); 494 #elif defined(VGA_arm64) 495 unsigned long long int block[3] 496 = { (unsigned long long int)p, (unsigned long long int)n, 497 0xFFFFFFFFFFFFFFFFULL}; 498 do { 499 __asm__ __volatile__( 500 "mov x5, %0" "\n\t" 501 "ldr x9, [x5, #0]" "\n\t" // p 502 "ldr x10, [x5, #8]" "\n\t" // n 503 "ldxr w8, [x9]" "\n\t" 504 "add x8, x8, x10" "\n\t" 505 "stxr w4, w8, [x9]" "\n\t" 506 "str x4, [x5, #16]" "\n\t" 507 : /*out*/ 508 : /*in*/ "r"(&block[0]) 509 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" 510 ); 511 } while (block[2] != 0); 512 #elif defined(VGA_s390x) 513 __asm__ __volatile__( 514 " l 0,%0\n\t" 515 "0: lr 1,0\n\t" 516 " ar 1,%1\n\t" 517 " cs 0,1,%0\n\t" 518 " jl 0b\n\t" 519 : "+m" (*p) 520 : "d" (n) 521 : "cc", "memory", "0", "1"); 522 #elif defined(VGA_mips32) 523 unsigned int block[3] 524 = { (unsigned int)p, (unsigned int)n, 0x0 }; 525 do { 526 __asm__ __volatile__( 527 "move $t0, %0" "\n\t" 528 "lw $t1, 0($t0)" "\n\t" // p 529 "lw $t2, 4($t0)" "\n\t" // n 530 "ll $t3, 0($t1)" "\n\t" 531 "addu $t3, $t3, $t2" "\n\t" 532 "sc $t3, 0($t1)" "\n\t" 533 "sw $t3, 8($t0)" "\n\t" 534 : /*out*/ 535 : /*in*/ "r"(&block[0]) 536 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 537 ); 538 } while (block[2] != 1); 539 #elif defined(VGA_mips64) 540 unsigned long block[3] 541 = { (unsigned long)p, (unsigned long)n, 0x0ULL }; 542 do { 543 __asm__ __volatile__( 544 "move $t0, %0" "\n\t" 545 "ld $t1, 0($t0)" "\n\t" // p 546 "ld $t2, 8($t0)" "\n\t" // n 547 "ll $t3, 0($t1)" "\n\t" 548 "addu $t3, $t3, $t2" "\n\t" 549 "sc $t3, 0($t1)" "\n\t" 550 "sd $t3, 16($t0)" "\n\t" 551 : /*out*/ 552 : /*in*/ "r"(&block[0]) 553 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 554 ); 555 } while (block[2] != 1); 556 #else 557 # error "Unsupported arch" 558 #endif 559 } 560 561 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n ) 562 { 563 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32) 564 /* do nothing; is not supported */ 565 #elif defined(VGA_amd64) 566 // this is a bit subtle. It relies on the fact that, on a 64-bit platform, 567 // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*) 568 unsigned long long int block[2]; 569 block[0] = (unsigned long long int)(unsigned long)p; 570 block[1] = n; 571 __asm__ __volatile__( 572 "movq 0(%%rsi),%%rax" "\n\t" 573 "movq 8(%%rsi),%%rbx" "\n\t" 574 "lock; addq %%rbx,(%%rax)" "\n" 575 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 576 ); 577 #elif defined(VGA_ppc64) 578 unsigned long success; 579 do { 580 __asm__ __volatile__( 581 "ldarx 15,0,%1" "\n\t" 582 "add 15,15,%2" "\n\t" 583 "stdcx. 15,0,%1" "\n\t" 584 "mfcr %0" "\n\t" 585 "srwi %0,%0,29" "\n\t" 586 "andi. %0,%0,1" "\n" 587 : /*out*/"=b"(success) 588 : /*in*/ "b"(p), "b"(n) 589 : /*trash*/ "memory", "cc", "r15" 590 ); 591 } while (success != 1); 592 #elif defined(VGA_arm) 593 unsigned long long int block[3] 594 = { (unsigned long long int)(unsigned long)p, 595 (unsigned long long int)n, 596 0xFFFFFFFFFFFFFFFFULL }; 597 do { 598 __asm__ __volatile__( 599 "mov r5, %0" "\n\t" 600 "ldr r8, [r5, #0]" "\n\t" // p 601 "ldrd r2, r3, [r5, #8]" "\n\t" // n 602 "ldrexd r0, r1, [r8]" "\n\t" 603 "adds r2, r2, r0" "\n\t" 604 "adc r3, r3, r1" "\n\t" 605 "strexd r1, r2, r3, [r8]" "\n\t" 606 "str r1, [r5, #16]" "\n\t" 607 : /*out*/ 608 : /*in*/ "r"(&block[0]) 609 : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3" 610 ); 611 } while (block[2] != 0xFFFFFFFF00000000ULL); 612 #elif defined(VGA_arm64) 613 unsigned long long int block[3] 614 = { (unsigned long long int)p, (unsigned long long int)n, 615 0xFFFFFFFFFFFFFFFFULL}; 616 do { 617 __asm__ __volatile__( 618 "mov x5, %0" "\n\t" 619 "ldr x9, [x5, #0]" "\n\t" // p 620 "ldr x10, [x5, #8]" "\n\t" // n 621 "ldxr x8, [x9]" "\n\t" 622 "add x8, x8, x10" "\n\t" 623 "stxr w4, x8, [x9]" "\n\t" 624 "str x4, [x5, #16]" "\n\t" 625 : /*out*/ 626 : /*in*/ "r"(&block[0]) 627 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" 628 ); 629 } while (block[2] != 0); 630 #elif defined(VGA_s390x) 631 __asm__ __volatile__( 632 " lg 0,%0\n\t" 633 "0: lgr 1,0\n\t" 634 " agr 1,%1\n\t" 635 " csg 0,1,%0\n\t" 636 " jl 0b\n\t" 637 : "+m" (*p) 638 : "d" (n) 639 : "cc", "memory", "0", "1"); 640 #elif defined(VGA_mips64) 641 unsigned long block[3] 642 = { (unsigned long)p, (unsigned long)n, 0x0ULL }; 643 do { 644 __asm__ __volatile__( 645 "move $t0, %0" "\n\t" 646 "ld $t1, 0($t0)" "\n\t" // p 647 "ld $t2, 8($t0)" "\n\t" // n 648 "lld $t3, 0($t1)" "\n\t" 649 "daddu $t3, $t3, $t2" "\n\t" 650 "scd $t3, 0($t1)" "\n\t" 651 "sd $t3, 16($t0)" "\n\t" 652 : /*out*/ 653 : /*in*/ "r"(&block[0]) 654 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 655 ); 656 } while (block[2] != 1); 657 #else 658 # error "Unsupported arch" 659 #endif 660 } 661 662 int main ( int argc, char** argv ) 663 { 664 int i, status; 665 char* page; 666 char* p8; 667 short* p16; 668 int* p32; 669 long long int* p64; 670 pid_t child, p2; 671 672 printf("parent, pre-fork\n"); 673 674 page = mmap( 0, sysconf(_SC_PAGESIZE), 675 PROT_READ|PROT_WRITE, 676 MAP_ANONYMOUS|MAP_SHARED, -1, 0 ); 677 if (page == MAP_FAILED) { 678 perror("mmap failed"); 679 exit(1); 680 } 681 682 p8 = (char*)(page+0); 683 p16 = (short*)(page+256); 684 p32 = (int*)(page+512); 685 p64 = (long long int*)(page+768); 686 687 assert( IS_8_ALIGNED(p8) ); 688 assert( IS_8_ALIGNED(p16) ); 689 assert( IS_8_ALIGNED(p32) ); 690 assert( IS_8_ALIGNED(p64) ); 691 692 memset(page, 0, 1024); 693 694 *p8 = 0; 695 *p16 = 0; 696 *p32 = 0; 697 *p64 = 0; 698 699 child = fork(); 700 if (child == -1) { 701 perror("fork() failed\n"); 702 return 1; 703 } 704 705 if (child == 0) { 706 /* --- CHILD --- */ 707 printf("child\n"); 708 for (i = 0; i < NNN; i++) { 709 atomic_add_8bit(p8, 1); 710 atomic_add_16bit(p16, 1); 711 atomic_add_32bit(p32, 1); 712 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ 713 } 714 return 1; 715 /* NOTREACHED */ 716 717 } 718 719 /* --- PARENT --- */ 720 721 printf("parent\n"); 722 723 for (i = 0; i < NNN; i++) { 724 atomic_add_8bit(p8, 1); 725 atomic_add_16bit(p16, 1); 726 atomic_add_32bit(p32, 1); 727 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ 728 } 729 730 p2 = waitpid(child, &status, 0); 731 assert(p2 == child); 732 733 /* assert that child finished normally */ 734 assert(WIFEXITED(status)); 735 736 printf("FINAL VALUES: 8 bit %d, 16 bit %d, 32 bit %d, 64 bit %lld\n", 737 (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 ); 738 739 if (-74 == (int)(*(signed char*)p8) 740 && 32694 == (int)(*p16) 741 && 6913974 == *p32 742 && (0LL == *p64 || 682858642110LL == *p64)) { 743 printf("PASS\n"); 744 } else { 745 printf("FAIL -- see source code for expected values\n"); 746 } 747 748 printf("parent exits\n"); 749 750 return 0; 751 } 752