1 2 /* This is an example of a program which does atomic memory operations 3 between two processes which share a page. Valgrind 3.4.1 and 4 earlier produce incorrect answers because it does not preserve 5 atomicity of the relevant instructions in the generated code; but 6 the post-DCAS-merge versions of Valgrind do behave correctly. */ 7 8 /* On ARM, this can be compiled into either ARM or Thumb code, so as 9 to test both A and T encodings of LDREX/STREX et al. Also on ARM, 10 it tests doubleword atomics (LDREXD, STREXD) which I don't think it 11 does on any other platform. */ 12 13 #include <stdlib.h> 14 #include <stdio.h> 15 #include <string.h> 16 #include <assert.h> 17 #include <unistd.h> 18 #include <sys/wait.h> 19 #include "tests/sys_mman.h" 20 21 #define NNN 3456987 22 23 #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7)) 24 25 26 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n ) 27 { 28 #if defined(VGA_x86) 29 unsigned long block[2]; 30 block[0] = (unsigned long)p; 31 block[1] = n; 32 __asm__ __volatile__( 33 "movl 0(%%esi),%%eax" "\n\t" 34 "movl 4(%%esi),%%ebx" "\n\t" 35 "lock; addb %%bl,(%%eax)" "\n" 36 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" 37 ); 38 #elif defined(VGA_amd64) 39 unsigned long block[2]; 40 block[0] = (unsigned long)p; 41 block[1] = n; 42 __asm__ __volatile__( 43 "movq 0(%%rsi),%%rax" "\n\t" 44 "movq 8(%%rsi),%%rbx" "\n\t" 45 "lock; addb %%bl,(%%rax)" "\n" 46 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 47 ); 48 #elif defined(VGA_ppc32) 49 /* Nasty hack. Does correctly atomically do *p += n, but only if p 50 is 4-aligned -- guaranteed by caller. */ 51 unsigned long success; 52 do { 53 __asm__ __volatile__( 54 "lwarx 15,0,%1" "\n\t" 55 "add 15,15,%2" "\n\t" 56 "stwcx. 15,0,%1" "\n\t" 57 "mfcr %0" "\n\t" 58 "srwi %0,%0,29" "\n\t" 59 "andi. %0,%0,1" "\n" 60 : /*out*/"=b"(success) 61 : /*in*/ "b"(p), "b"(((unsigned long)n) << 24) 62 : /*trash*/ "memory", "cc", "r15" 63 ); 64 } while (success != 1); 65 #elif defined(VGA_ppc64be) 66 /* Nasty hack. Does correctly atomically do *p += n, but only if p 67 is 8-aligned -- guaranteed by caller. */ 68 unsigned long success; 69 do { 70 __asm__ __volatile__( 71 "ldarx 15,0,%1" "\n\t" 72 "add 15,15,%2" "\n\t" 73 "stdcx. 15,0,%1" "\n\t" 74 "mfcr %0" "\n\t" 75 "srwi %0,%0,29" "\n\t" 76 "andi. %0,%0,1" "\n" 77 : /*out*/"=b"(success) 78 : /*in*/ "b"(p), "b"(((unsigned long)n) << 56) 79 : /*trash*/ "memory", "cc", "r15" 80 ); 81 } while (success != 1); 82 #elif defined(VGA_ppc64le) 83 /* Nasty hack. Does correctly atomically do *p += n, but only if p 84 is 8-aligned -- guaranteed by caller. */ 85 unsigned long success; 86 do { 87 __asm__ __volatile__( 88 "ldarx 15,0,%1" "\n\t" 89 "add 15,15,%2" "\n\t" 90 "stdcx. 15,0,%1" "\n\t" 91 "mfcr %0" "\n\t" 92 "srwi %0,%0,29" "\n\t" 93 "andi. %0,%0,1" "\n" 94 : /*out*/"=b"(success) 95 : /*in*/ "b"(p), "b"(((unsigned long)n)) 96 : /*trash*/ "memory", "cc", "r15" 97 ); 98 } while (success != 1); 99 #elif defined(VGA_arm) 100 unsigned int block[3] 101 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; 102 do { 103 __asm__ __volatile__( 104 "mov r5, %0" "\n\t" 105 "ldr r9, [r5, #0]" "\n\t" // p 106 "ldr r10, [r5, #4]" "\n\t" // n 107 "ldrexb r8, [r9]" "\n\t" 108 "add r8, r8, r10" "\n\t" 109 "strexb r4, r8, [r9]" "\n\t" 110 "str r4, [r5, #8]" "\n\t" 111 : /*out*/ 112 : /*in*/ "r"(&block[0]) 113 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" 114 ); 115 } while (block[2] != 0); 116 #elif defined(VGA_arm64) 117 unsigned long long int block[3] 118 = { (unsigned long long int)p, (unsigned long long int)n, 119 0xFFFFFFFFFFFFFFFFULL}; 120 do { 121 __asm__ __volatile__( 122 "mov x5, %0" "\n\t" 123 "ldr x9, [x5, #0]" "\n\t" // p 124 "ldr x10, [x5, #8]" "\n\t" // n 125 "ldxrb w8, [x9]" "\n\t" 126 "add x8, x8, x10" "\n\t" 127 "stxrb w4, w8, [x9]" "\n\t" 128 "str x4, [x5, #16]" "\n\t" 129 : /*out*/ 130 : /*in*/ "r"(&block[0]) 131 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" 132 ); 133 } while (block[2] != 0); 134 #elif defined(VGA_s390x) 135 int dummy; 136 __asm__ __volatile__( 137 " l 0,%0\n\t" 138 "0: st 0,%1\n\t" 139 " icm 1,1,%1\n\t" 140 " ar 1,%2\n\t" 141 " stcm 1,1,%1\n\t" 142 " l 1,%1\n\t" 143 " cs 0,1,%0\n\t" 144 " jl 0b\n\t" 145 : "+m" (*p), "+m" (dummy) 146 : "d" (n) 147 : "cc", "memory", "0", "1"); 148 #elif defined(VGA_mips32) 149 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an 150 exception that can cause this function to fail. */ 151 #if defined (_MIPSEL) 152 unsigned int block[3] 153 = { (unsigned int)p, (unsigned int)n, 0x0 }; 154 do { 155 __asm__ __volatile__( 156 "move $t0, %0" "\n\t" 157 "lw $t1, 0($t0)" "\n\t" // p 158 "lw $t2, 4($t0)" "\n\t" // n 159 "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF 160 "li $t4, 0xFF" "\n\t" 161 "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFFFF00 162 "ll $t3, 0($t1)" "\n\t" // $t3 = old value 163 "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFFFF00 164 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n 165 "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF 166 "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4 167 "sc $t3, 0($t1)" "\n\t" 168 "sw $t3, 8($t0)" "\n\t" // save result 169 : /*out*/ 170 : /*in*/ "r"(&block[0]) 171 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4" 172 ); 173 } while (block[2] != 1); 174 #elif defined (_MIPSEB) 175 unsigned int block[3] 176 = { (unsigned int)p, (unsigned int)n << 24, 0x0 }; 177 do { 178 __asm__ __volatile__( 179 "move $t0, %0" "\n\t" 180 "lw $t1, 0($t0)" "\n\t" // p 181 "lw $t2, 4($t0)" "\n\t" // n 182 "ll $t3, 0($t1)" "\n\t" 183 "addu $t3, $t3, $t2" "\n\t" 184 "sc $t3, 0($t1)" "\n\t" 185 "sw $t3, 8($t0)" "\n\t" 186 : /*out*/ 187 : /*in*/ "r"(&block[0]) 188 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 189 ); 190 } while (block[2] != 1); 191 #endif 192 #elif defined(VGA_mips64) 193 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an 194 exception that can cause this function to fail. */ 195 #if defined (_MIPSEL) 196 unsigned long block[3] 197 = { (unsigned long)p, (unsigned long)n, 0x0ULL }; 198 do { 199 __asm__ __volatile__( 200 "move $t0, %0" "\n\t" 201 "ld $t1, 0($t0)" "\n\t" // p 202 "ld $t2, 8($t0)" "\n\t" // n 203 "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF 204 "li $s0, 0xFF" "\n\t" 205 "nor $s0, $s0, $zero" "\n\t" // $s0 = 0xFFFFFF00 206 "ll $t3, 0($t1)" "\n\t" // $t3 = old value 207 "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFFFF00 208 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n 209 "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF 210 "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0 211 "sc $t3, 0($t1)" "\n\t" 212 "sw $t3, 16($t0)" "\n\t" // save result 213 : /*out*/ 214 : /*in*/ "r"(&block[0]) 215 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0" 216 ); 217 } while (block[2] != 1); 218 #elif defined (_MIPSEB) 219 unsigned long block[3] 220 = { (unsigned long)p, (unsigned long)n << 56, 0x0 }; 221 do { 222 __asm__ __volatile__( 223 "move $t0, %0" "\n\t" 224 "ld $t1, 0($t0)" "\n\t" // p 225 "ld $t2, 8($t0)" "\n\t" // n 226 "lld $t3, 0($t1)" "\n\t" 227 "daddu $t3, $t3, $t2" "\n\t" 228 "scd $t3, 0($t1)" "\n\t" 229 "sd $t3, 16($t0)" "\n\t" 230 : /*out*/ 231 : /*in*/ "r"(&block[0]) 232 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 233 ); 234 } while (block[2] != 1); 235 #endif 236 #else 237 # error "Unsupported arch" 238 #endif 239 } 240 241 242 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n ) 243 { 244 #if defined(VGA_x86) 245 unsigned long block[2]; 246 block[0] = (unsigned long)p; 247 block[1] = n; 248 __asm__ __volatile__( 249 "movl 0(%%esi),%%eax" "\n\t" 250 "movl 4(%%esi),%%ebx" "\n\t" 251 "lock; addw %%bx,(%%eax)" "\n" 252 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" 253 ); 254 #elif defined(VGA_amd64) 255 unsigned long block[2]; 256 block[0] = (unsigned long)p; 257 block[1] = n; 258 __asm__ __volatile__( 259 "movq 0(%%rsi),%%rax" "\n\t" 260 "movq 8(%%rsi),%%rbx" "\n\t" 261 "lock; addw %%bx,(%%rax)" "\n" 262 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 263 ); 264 #elif defined(VGA_ppc32) 265 /* Nasty hack. Does correctly atomically do *p += n, but only if p 266 is 8-aligned -- guaranteed by caller. */ 267 unsigned long success; 268 do { 269 __asm__ __volatile__( 270 "lwarx 15,0,%1" "\n\t" 271 "add 15,15,%2" "\n\t" 272 "stwcx. 15,0,%1" "\n\t" 273 "mfcr %0" "\n\t" 274 "srwi %0,%0,29" "\n\t" 275 "andi. %0,%0,1" "\n" 276 : /*out*/"=b"(success) 277 : /*in*/ "b"(p), "b"(((unsigned long)n) << 16) 278 : /*trash*/ "memory", "cc", "r15" 279 ); 280 } while (success != 1); 281 #elif defined(VGA_ppc64be) 282 /* Nasty hack. Does correctly atomically do *p += n, but only if p 283 is 8-aligned -- guaranteed by caller. */ 284 unsigned long success; 285 do { 286 __asm__ __volatile__( 287 "ldarx 15,0,%1" "\n\t" 288 "add 15,15,%2" "\n\t" 289 "stdcx. 15,0,%1" "\n\t" 290 "mfcr %0" "\n\t" 291 "srwi %0,%0,29" "\n\t" 292 "andi. %0,%0,1" "\n" 293 : /*out*/"=b"(success) 294 : /*in*/ "b"(p), "b"(((unsigned long)n) << 48) 295 : /*trash*/ "memory", "cc", "r15" 296 ); 297 } while (success != 1); 298 #elif defined(VGA_ppc64le) 299 /* Nasty hack. Does correctly atomically do *p += n, but only if p 300 is 8-aligned -- guaranteed by caller. */ 301 unsigned long success; 302 do { 303 __asm__ __volatile__( 304 "ldarx 15,0,%1" "\n\t" 305 "add 15,15,%2" "\n\t" 306 "stdcx. 15,0,%1" "\n\t" 307 "mfcr %0" "\n\t" 308 "srwi %0,%0,29" "\n\t" 309 "andi. %0,%0,1" "\n" 310 : /*out*/"=b"(success) 311 : /*in*/ "b"(p), "b"(((unsigned long)n)) 312 : /*trash*/ "memory", "cc", "r15" 313 ); 314 } while (success != 1); 315 #elif defined(VGA_arm) 316 unsigned int block[3] 317 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; 318 do { 319 __asm__ __volatile__( 320 "mov r5, %0" "\n\t" 321 "ldr r9, [r5, #0]" "\n\t" // p 322 "ldr r10, [r5, #4]" "\n\t" // n 323 "ldrexh r8, [r9]" "\n\t" 324 "add r8, r8, r10" "\n\t" 325 "strexh r4, r8, [r9]" "\n\t" 326 "str r4, [r5, #8]" "\n\t" 327 : /*out*/ 328 : /*in*/ "r"(&block[0]) 329 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" 330 ); 331 } while (block[2] != 0); 332 #elif defined(VGA_arm64) 333 unsigned long long int block[3] 334 = { (unsigned long long int)p, (unsigned long long int)n, 335 0xFFFFFFFFFFFFFFFFULL}; 336 do { 337 __asm__ __volatile__( 338 "mov x5, %0" "\n\t" 339 "ldr x9, [x5, #0]" "\n\t" // p 340 "ldr x10, [x5, #8]" "\n\t" // n 341 "ldxrh w8, [x9]" "\n\t" 342 "add x8, x8, x10" "\n\t" 343 "stxrh w4, w8, [x9]" "\n\t" 344 "str x4, [x5, #16]" "\n\t" 345 : /*out*/ 346 : /*in*/ "r"(&block[0]) 347 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" 348 ); 349 } while (block[2] != 0); 350 #elif defined(VGA_s390x) 351 int dummy; 352 __asm__ __volatile__( 353 " l 0,%0\n\t" 354 "0: st 0,%1\n\t" 355 " icm 1,3,%1\n\t" 356 " ar 1,%2\n\t" 357 " stcm 1,3,%1\n\t" 358 " l 1,%1\n\t" 359 " cs 0,1,%0\n\t" 360 " jl 0b\n\t" 361 : "+m" (*p), "+m" (dummy) 362 : "d" (n) 363 : "cc", "memory", "0", "1"); 364 #elif defined(VGA_mips32) 365 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an 366 exception that can cause this function to fail. */ 367 #if defined (_MIPSEL) 368 unsigned int block[3] 369 = { (unsigned int)p, (unsigned int)n, 0x0 }; 370 do { 371 __asm__ __volatile__( 372 "move $t0, %0" "\n\t" 373 "lw $t1, 0($t0)" "\n\t" // p 374 "lw $t2, 4($t0)" "\n\t" // n 375 "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF 376 "li $t4, 0xFFFF" "\n\t" 377 "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFF0000 378 "ll $t3, 0($t1)" "\n\t" // $t3 = old value 379 "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFF0000 380 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n 381 "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF 382 "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4 383 "sc $t3, 0($t1)" "\n\t" 384 "sw $t3, 8($t0)" "\n\t" // save result 385 : /*out*/ 386 : /*in*/ "r"(&block[0]) 387 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4" 388 ); 389 } while (block[2] != 1); 390 #elif defined (_MIPSEB) 391 unsigned int block[3] 392 = { (unsigned int)p, (unsigned int)n << 16, 0x0 }; 393 do { 394 __asm__ __volatile__( 395 "move $t0, %0" "\n\t" 396 "lw $t1, 0($t0)" "\n\t" // p 397 "lw $t2, 4($t0)" "\n\t" // n 398 "ll $t3, 0($t1)" "\n\t" 399 "addu $t3, $t3, $t2" "\n\t" 400 "sc $t3, 0($t1)" "\n\t" 401 "sw $t3, 8($t0)" "\n\t" 402 : /*out*/ 403 : /*in*/ "r"(&block[0]) 404 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 405 ); 406 } while (block[2] != 1); 407 #endif 408 #elif defined(VGA_mips64) 409 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an 410 exception that can cause this function to fail. */ 411 #if defined (_MIPSEL) 412 unsigned long block[3] 413 = { (unsigned long)p, (unsigned long)n, 0x0ULL }; 414 do { 415 __asm__ __volatile__( 416 "move $t0, %0" "\n\t" 417 "ld $t1, 0($t0)" "\n\t" // p 418 "ld $t2, 8($t0)" "\n\t" // n 419 "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF 420 "li $s0, 0xFFFF" "\n\t" 421 "nor $s0, $s0, $zero" "\n\t" // $s0= 0xFFFF0000 422 "ll $t3, 0($t1)" "\n\t" // $t3 = old value 423 "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFF0000 424 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n 425 "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF 426 "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0 427 "sc $t3, 0($t1)" "\n\t" 428 "sw $t3, 16($t0)" "\n\t" // save result 429 : /*out*/ 430 : /*in*/ "r"(&block[0]) 431 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0" 432 ); 433 } while (block[2] != 1); 434 #elif defined (_MIPSEB) 435 unsigned long block[3] 436 = { (unsigned long)p, (unsigned long)n << 48, 0x0 }; 437 do { 438 __asm__ __volatile__( 439 "move $t0, %0" "\n\t" 440 "ld $t1, 0($t0)" "\n\t" // p 441 "ld $t2, 8($t0)" "\n\t" // n 442 "lld $t3, 0($t1)" "\n\t" 443 "daddu $t3, $t3, $t2" "\n\t" 444 "scd $t3, 0($t1)" "\n\t" 445 "sd $t3, 16($t0)" "\n\t" 446 : /*out*/ 447 : /*in*/ "r"(&block[0]) 448 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 449 ); 450 } while (block[2] != 1); 451 #endif 452 #else 453 # error "Unsupported arch" 454 #endif 455 } 456 457 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n ) 458 { 459 #if defined(VGA_x86) 460 unsigned long block[2]; 461 block[0] = (unsigned long)p; 462 block[1] = n; 463 __asm__ __volatile__( 464 "movl 0(%%esi),%%eax" "\n\t" 465 "movl 4(%%esi),%%ebx" "\n\t" 466 "lock; addl %%ebx,(%%eax)" "\n" 467 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" 468 ); 469 #elif defined(VGA_amd64) 470 unsigned long block[2]; 471 block[0] = (unsigned long)p; 472 block[1] = n; 473 __asm__ __volatile__( 474 "movq 0(%%rsi),%%rax" "\n\t" 475 "movq 8(%%rsi),%%rbx" "\n\t" 476 "lock; addl %%ebx,(%%rax)" "\n" 477 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 478 ); 479 #elif defined(VGA_ppc32) 480 unsigned long success; 481 do { 482 __asm__ __volatile__( 483 "lwarx 15,0,%1" "\n\t" 484 "add 15,15,%2" "\n\t" 485 "stwcx. 15,0,%1" "\n\t" 486 "mfcr %0" "\n\t" 487 "srwi %0,%0,29" "\n\t" 488 "andi. %0,%0,1" "\n" 489 : /*out*/"=b"(success) 490 : /*in*/ "b"(p), "b"(n) 491 : /*trash*/ "memory", "cc", "r15" 492 ); 493 } while (success != 1); 494 #elif defined(VGA_ppc64be) 495 /* Nasty hack. Does correctly atomically do *p += n, but only if p 496 is 8-aligned -- guaranteed by caller. */ 497 unsigned long success; 498 do { 499 __asm__ __volatile__( 500 "ldarx 15,0,%1" "\n\t" 501 "add 15,15,%2" "\n\t" 502 "stdcx. 15,0,%1" "\n\t" 503 "mfcr %0" "\n\t" 504 "srwi %0,%0,29" "\n\t" 505 "andi. %0,%0,1" "\n" 506 : /*out*/"=b"(success) 507 : /*in*/ "b"(p), "b"(((unsigned long)n) << 32) 508 : /*trash*/ "memory", "cc", "r15" 509 ); 510 } while (success != 1); 511 #elif defined(VGA_ppc64le) 512 /* Nasty hack. Does correctly atomically do *p += n, but only if p 513 is 8-aligned -- guaranteed by caller. */ 514 unsigned long success; 515 do { 516 __asm__ __volatile__( 517 "ldarx 15,0,%1" "\n\t" 518 "add 15,15,%2" "\n\t" 519 "stdcx. 15,0,%1" "\n\t" 520 "mfcr %0" "\n\t" 521 "srwi %0,%0,29" "\n\t" 522 "andi. %0,%0,1" "\n" 523 : /*out*/"=b"(success) 524 : /*in*/ "b"(p), "b"(((unsigned long)n)) 525 : /*trash*/ "memory", "cc", "r15" 526 ); 527 } while (success != 1); 528 #elif defined(VGA_arm) 529 unsigned int block[3] 530 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; 531 do { 532 __asm__ __volatile__( 533 "mov r5, %0" "\n\t" 534 "ldr r9, [r5, #0]" "\n\t" // p 535 "ldr r10, [r5, #4]" "\n\t" // n 536 "ldrex r8, [r9]" "\n\t" 537 "add r8, r8, r10" "\n\t" 538 "strex r4, r8, [r9]" "\n\t" 539 "str r4, [r5, #8]" "\n\t" 540 : /*out*/ 541 : /*in*/ "r"(&block[0]) 542 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" 543 ); 544 } while (block[2] != 0); 545 #elif defined(VGA_arm64) 546 unsigned long long int block[3] 547 = { (unsigned long long int)p, (unsigned long long int)n, 548 0xFFFFFFFFFFFFFFFFULL}; 549 do { 550 __asm__ __volatile__( 551 "mov x5, %0" "\n\t" 552 "ldr x9, [x5, #0]" "\n\t" // p 553 "ldr x10, [x5, #8]" "\n\t" // n 554 "ldxr w8, [x9]" "\n\t" 555 "add x8, x8, x10" "\n\t" 556 "stxr w4, w8, [x9]" "\n\t" 557 "str x4, [x5, #16]" "\n\t" 558 : /*out*/ 559 : /*in*/ "r"(&block[0]) 560 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" 561 ); 562 } while (block[2] != 0); 563 #elif defined(VGA_s390x) 564 __asm__ __volatile__( 565 " l 0,%0\n\t" 566 "0: lr 1,0\n\t" 567 " ar 1,%1\n\t" 568 " cs 0,1,%0\n\t" 569 " jl 0b\n\t" 570 : "+m" (*p) 571 : "d" (n) 572 : "cc", "memory", "0", "1"); 573 #elif defined(VGA_mips32) 574 unsigned int block[3] 575 = { (unsigned int)p, (unsigned int)n, 0x0 }; 576 do { 577 __asm__ __volatile__( 578 "move $t0, %0" "\n\t" 579 "lw $t1, 0($t0)" "\n\t" // p 580 "lw $t2, 4($t0)" "\n\t" // n 581 "ll $t3, 0($t1)" "\n\t" 582 "addu $t3, $t3, $t2" "\n\t" 583 "sc $t3, 0($t1)" "\n\t" 584 "sw $t3, 8($t0)" "\n\t" 585 : /*out*/ 586 : /*in*/ "r"(&block[0]) 587 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 588 ); 589 } while (block[2] != 1); 590 #elif defined(VGA_mips64) 591 unsigned long block[3] 592 = { (unsigned long)p, (unsigned long)n, 0x0ULL }; 593 do { 594 __asm__ __volatile__( 595 "move $t0, %0" "\n\t" 596 "ld $t1, 0($t0)" "\n\t" // p 597 "ld $t2, 8($t0)" "\n\t" // n 598 "ll $t3, 0($t1)" "\n\t" 599 "addu $t3, $t3, $t2" "\n\t" 600 "sc $t3, 0($t1)" "\n\t" 601 "sd $t3, 16($t0)" "\n\t" 602 : /*out*/ 603 : /*in*/ "r"(&block[0]) 604 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 605 ); 606 } while (block[2] != 1); 607 #else 608 # error "Unsupported arch" 609 #endif 610 } 611 612 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n ) 613 { 614 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32) 615 /* do nothing; is not supported */ 616 #elif defined(VGA_amd64) 617 // this is a bit subtle. It relies on the fact that, on a 64-bit platform, 618 // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*) 619 unsigned long long int block[2]; 620 block[0] = (unsigned long long int)(unsigned long)p; 621 block[1] = n; 622 __asm__ __volatile__( 623 "movq 0(%%rsi),%%rax" "\n\t" 624 "movq 8(%%rsi),%%rbx" "\n\t" 625 "lock; addq %%rbx,(%%rax)" "\n" 626 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" 627 ); 628 #elif defined(VGA_ppc64be) || defined(VGA_ppc64le) 629 unsigned long success; 630 do { 631 __asm__ __volatile__( 632 "ldarx 15,0,%1" "\n\t" 633 "add 15,15,%2" "\n\t" 634 "stdcx. 15,0,%1" "\n\t" 635 "mfcr %0" "\n\t" 636 "srwi %0,%0,29" "\n\t" 637 "andi. %0,%0,1" "\n" 638 : /*out*/"=b"(success) 639 : /*in*/ "b"(p), "b"(n) 640 : /*trash*/ "memory", "cc", "r15" 641 ); 642 } while (success != 1); 643 #elif defined(VGA_arm) 644 unsigned long long int block[3] 645 = { (unsigned long long int)(unsigned long)p, 646 (unsigned long long int)n, 647 0xFFFFFFFFFFFFFFFFULL }; 648 do { 649 __asm__ __volatile__( 650 "mov r5, %0" "\n\t" 651 "ldr r8, [r5, #0]" "\n\t" // p 652 "ldrd r2, r3, [r5, #8]" "\n\t" // n 653 "ldrexd r0, r1, [r8]" "\n\t" 654 "adds r2, r2, r0" "\n\t" 655 "adc r3, r3, r1" "\n\t" 656 "strexd r1, r2, r3, [r8]" "\n\t" 657 "str r1, [r5, #16]" "\n\t" 658 : /*out*/ 659 : /*in*/ "r"(&block[0]) 660 : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3" 661 ); 662 } while (block[2] != 0xFFFFFFFF00000000ULL); 663 #elif defined(VGA_arm64) 664 unsigned long long int block[3] 665 = { (unsigned long long int)p, (unsigned long long int)n, 666 0xFFFFFFFFFFFFFFFFULL}; 667 do { 668 __asm__ __volatile__( 669 "mov x5, %0" "\n\t" 670 "ldr x9, [x5, #0]" "\n\t" // p 671 "ldr x10, [x5, #8]" "\n\t" // n 672 "ldxr x8, [x9]" "\n\t" 673 "add x8, x8, x10" "\n\t" 674 "stxr w4, x8, [x9]" "\n\t" 675 "str x4, [x5, #16]" "\n\t" 676 : /*out*/ 677 : /*in*/ "r"(&block[0]) 678 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" 679 ); 680 } while (block[2] != 0); 681 #elif defined(VGA_s390x) 682 __asm__ __volatile__( 683 " lg 0,%0\n\t" 684 "0: lgr 1,0\n\t" 685 " agr 1,%1\n\t" 686 " csg 0,1,%0\n\t" 687 " jl 0b\n\t" 688 : "+m" (*p) 689 : "d" (n) 690 : "cc", "memory", "0", "1"); 691 #elif defined(VGA_mips64) 692 unsigned long block[3] 693 = { (unsigned long)p, (unsigned long)n, 0x0ULL }; 694 do { 695 __asm__ __volatile__( 696 "move $t0, %0" "\n\t" 697 "ld $t1, 0($t0)" "\n\t" // p 698 "ld $t2, 8($t0)" "\n\t" // n 699 "lld $t3, 0($t1)" "\n\t" 700 "daddu $t3, $t3, $t2" "\n\t" 701 "scd $t3, 0($t1)" "\n\t" 702 "sd $t3, 16($t0)" "\n\t" 703 : /*out*/ 704 : /*in*/ "r"(&block[0]) 705 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 706 ); 707 } while (block[2] != 1); 708 #else 709 # error "Unsupported arch" 710 #endif 711 } 712 713 int main ( int argc, char** argv ) 714 { 715 int i, status; 716 char* page; 717 char* p8; 718 short* p16; 719 int* p32; 720 long long int* p64; 721 pid_t child, p2; 722 723 printf("parent, pre-fork\n"); 724 725 page = mmap( 0, sysconf(_SC_PAGESIZE), 726 PROT_READ|PROT_WRITE, 727 MAP_ANONYMOUS|MAP_SHARED, -1, 0 ); 728 if (page == MAP_FAILED) { 729 perror("mmap failed"); 730 exit(1); 731 } 732 733 p8 = (char*)(page+0); 734 p16 = (short*)(page+256); 735 p32 = (int*)(page+512); 736 p64 = (long long int*)(page+768); 737 738 assert( IS_8_ALIGNED(p8) ); 739 assert( IS_8_ALIGNED(p16) ); 740 assert( IS_8_ALIGNED(p32) ); 741 assert( IS_8_ALIGNED(p64) ); 742 743 memset(page, 0, 1024); 744 745 *p8 = 0; 746 *p16 = 0; 747 *p32 = 0; 748 *p64 = 0; 749 750 child = fork(); 751 if (child == -1) { 752 perror("fork() failed\n"); 753 return 1; 754 } 755 756 if (child == 0) { 757 /* --- CHILD --- */ 758 printf("child\n"); 759 for (i = 0; i < NNN; i++) { 760 atomic_add_8bit(p8, 1); 761 atomic_add_16bit(p16, 1); 762 atomic_add_32bit(p32, 1); 763 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ 764 } 765 return 1; 766 /* NOTREACHED */ 767 768 } 769 770 /* --- PARENT --- */ 771 772 printf("parent\n"); 773 774 for (i = 0; i < NNN; i++) { 775 atomic_add_8bit(p8, 1); 776 atomic_add_16bit(p16, 1); 777 atomic_add_32bit(p32, 1); 778 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ 779 } 780 781 p2 = waitpid(child, &status, 0); 782 assert(p2 == child); 783 784 /* assert that child finished normally */ 785 assert(WIFEXITED(status)); 786 787 printf("FINAL VALUES: 8 bit %d, 16 bit %d, 32 bit %d, 64 bit %lld\n", 788 (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 ); 789 790 if (-74 == (int)(*(signed char*)p8) 791 && 32694 == (int)(*p16) 792 && 6913974 == *p32 793 && (0LL == *p64 || 682858642110LL == *p64)) { 794 printf("PASS\n"); 795 } else { 796 printf("FAIL -- see source code for expected values\n"); 797 } 798 799 printf("parent exits\n"); 800 801 return 0; 802 } 803