Home | History | Annotate | Download | only in tests
      1 
      2 /* This is an example of a program which does atomic memory operations
      3    between two processes which share a page.  Valgrind 3.4.1 and
      4    earlier produce incorrect answers because it does not preserve
      5    atomicity of the relevant instructions in the generated code; but
      6    the post-DCAS-merge versions of Valgrind do behave correctly. */
      7 
      8 #include <stdlib.h>
      9 #include <stdio.h>
     10 #include <string.h>
     11 #include <assert.h>
     12 #include <unistd.h>
     13 #include <sys/wait.h>
     14 #include "tests/sys_mman.h"
     15 
     16 #define NNN 3456987
     17 
     18 #define IS_8_ALIGNED(_ptr)   (0 == (((unsigned long)(_ptr)) & 7))
     19 
     20 
     21 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
     22 {
     23 #if defined(VGA_x86)
     24    unsigned long block[2];
     25    block[0] = (unsigned long)p;
     26    block[1] = n;
     27    __asm__ __volatile__(
     28       "movl 0(%%esi),%%eax"      "\n\t"
     29       "movl 4(%%esi),%%ebx"      "\n\t"
     30       "lock; addb %%bl,(%%eax)"  "\n"
     31       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
     32    );
     33 #elif defined(VGA_amd64)
     34    unsigned long block[2];
     35    block[0] = (unsigned long)p;
     36    block[1] = n;
     37    __asm__ __volatile__(
     38       "movq 0(%%rsi),%%rax"      "\n\t"
     39       "movq 8(%%rsi),%%rbx"      "\n\t"
     40       "lock; addb %%bl,(%%rax)"  "\n"
     41       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
     42    );
     43 #elif defined(VGA_ppc32)
     44    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
     45       is 4-aligned -- guaranteed by caller. */
     46    unsigned long success;
     47    do {
     48       __asm__ __volatile__(
     49          "lwarx  15,0,%1"    "\n\t"
     50          "add    15,15,%2"   "\n\t"
     51          "stwcx. 15,0,%1"    "\n\t"
     52          "mfcr   %0"         "\n\t"
     53          "srwi   %0,%0,29"   "\n\t"
     54          "andi.  %0,%0,1"    "\n"
     55          : /*out*/"=b"(success)
     56          : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
     57          : /*trash*/ "memory", "cc", "r15"
     58       );
     59    } while (success != 1);
     60 #elif defined(VGA_ppc64)
     61    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
     62       is 8-aligned -- guaranteed by caller. */
     63    unsigned long success;
     64    do {
     65       __asm__ __volatile__(
     66          "ldarx  15,0,%1"    "\n\t"
     67          "add    15,15,%2"   "\n\t"
     68          "stdcx. 15,0,%1"    "\n\t"
     69          "mfcr   %0"         "\n\t"
     70          "srwi   %0,%0,29"   "\n\t"
     71          "andi.  %0,%0,1"    "\n"
     72          : /*out*/"=b"(success)
     73          : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
     74          : /*trash*/ "memory", "cc", "r15"
     75       );
     76    } while (success != 1);
     77 #elif defined(VGA_arm)
     78    *p += n;
     79 #else
     80 # error "Unsupported arch"
     81 #endif
     82 }
     83 
     84 
     85 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
     86 {
     87 #if defined(VGA_x86)
     88    unsigned long block[2];
     89    block[0] = (unsigned long)p;
     90    block[1] = n;
     91    __asm__ __volatile__(
     92       "movl 0(%%esi),%%eax"      "\n\t"
     93       "movl 4(%%esi),%%ebx"      "\n\t"
     94       "lock; addw %%bx,(%%eax)"  "\n"
     95       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
     96    );
     97 #elif defined(VGA_amd64)
     98    unsigned long block[2];
     99    block[0] = (unsigned long)p;
    100    block[1] = n;
    101    __asm__ __volatile__(
    102       "movq 0(%%rsi),%%rax"      "\n\t"
    103       "movq 8(%%rsi),%%rbx"      "\n\t"
    104       "lock; addw %%bx,(%%rax)"  "\n"
    105       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
    106    );
    107 #elif defined(VGA_ppc32)
    108    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
    109       is 8-aligned -- guaranteed by caller. */
    110    unsigned long success;
    111    do {
    112       __asm__ __volatile__(
    113          "lwarx  15,0,%1"    "\n\t"
    114          "add    15,15,%2"   "\n\t"
    115          "stwcx. 15,0,%1"    "\n\t"
    116          "mfcr   %0"         "\n\t"
    117          "srwi   %0,%0,29"   "\n\t"
    118          "andi.  %0,%0,1"    "\n"
    119          : /*out*/"=b"(success)
    120          : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
    121          : /*trash*/ "memory", "cc", "r15"
    122       );
    123    } while (success != 1);
    124 #elif defined(VGA_ppc64)
    125    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
    126       is 8-aligned -- guaranteed by caller. */
    127    unsigned long success;
    128    do {
    129       __asm__ __volatile__(
    130          "ldarx  15,0,%1"    "\n\t"
    131          "add    15,15,%2"   "\n\t"
    132          "stdcx. 15,0,%1"    "\n\t"
    133          "mfcr   %0"         "\n\t"
    134          "srwi   %0,%0,29"   "\n\t"
    135          "andi.  %0,%0,1"    "\n"
    136          : /*out*/"=b"(success)
    137          : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
    138          : /*trash*/ "memory", "cc", "r15"
    139       );
    140    } while (success != 1);
    141 #elif defined(VGA_arm)
    142    *p += n;
    143 #else
    144 # error "Unsupported arch"
    145 #endif
    146 }
    147 
    148 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
    149 {
    150 #if defined(VGA_x86)
    151    unsigned long block[2];
    152    block[0] = (unsigned long)p;
    153    block[1] = n;
    154    __asm__ __volatile__(
    155       "movl 0(%%esi),%%eax"       "\n\t"
    156       "movl 4(%%esi),%%ebx"       "\n\t"
    157       "lock; addl %%ebx,(%%eax)"  "\n"
    158       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
    159    );
    160 #elif defined(VGA_amd64)
    161    unsigned long block[2];
    162    block[0] = (unsigned long)p;
    163    block[1] = n;
    164    __asm__ __volatile__(
    165       "movq 0(%%rsi),%%rax"       "\n\t"
    166       "movq 8(%%rsi),%%rbx"       "\n\t"
    167       "lock; addl %%ebx,(%%rax)"  "\n"
    168       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
    169    );
    170 #elif defined(VGA_ppc32)
    171    unsigned long success;
    172    do {
    173       __asm__ __volatile__(
    174          "lwarx  15,0,%1"    "\n\t"
    175          "add    15,15,%2"   "\n\t"
    176          "stwcx. 15,0,%1"    "\n\t"
    177          "mfcr   %0"         "\n\t"
    178          "srwi   %0,%0,29"   "\n\t"
    179          "andi.  %0,%0,1"    "\n"
    180          : /*out*/"=b"(success)
    181          : /*in*/ "b"(p), "b"(n)
    182          : /*trash*/ "memory", "cc", "r15"
    183       );
    184    } while (success != 1);
    185 #elif defined(VGA_ppc64)
    186    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
    187       is 8-aligned -- guaranteed by caller. */
    188    unsigned long success;
    189    do {
    190       __asm__ __volatile__(
    191          "ldarx  15,0,%1"    "\n\t"
    192          "add    15,15,%2"   "\n\t"
    193          "stdcx. 15,0,%1"    "\n\t"
    194          "mfcr   %0"         "\n\t"
    195          "srwi   %0,%0,29"   "\n\t"
    196          "andi.  %0,%0,1"    "\n"
    197          : /*out*/"=b"(success)
    198          : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
    199          : /*trash*/ "memory", "cc", "r15"
    200       );
    201    } while (success != 1);
    202 #elif defined(VGA_arm)
    203    unsigned int block[3]
    204       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
    205    do {
    206       __asm__ __volatile__(
    207          "mov   r5, %0"         "\n\t"
    208          "ldr   r9, [r5, #0]"   "\n\t" // p
    209          "ldr   r10, [r5, #4]"  "\n\t" // n
    210          "ldrex r8, [r9]"       "\n\t"
    211          "add   r8, r8, r10"    "\n\t"
    212          "strex r11, r8, [r9]"  "\n\t"
    213          "str   r11, [r5, #8]"  "\n\t"
    214          : /*out*/
    215          : /*in*/ "r"(&block[0])
    216          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10"
    217       );
    218    } while (block[2] != 0);
    219 #else
    220 # error "Unsupported arch"
    221 #endif
    222 }
    223 
    224 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
    225 {
    226 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_arm)
    227    /* do nothing; is not supported */
    228 #elif defined(VGA_amd64)
    229    // this is a bit subtle.  It relies on the fact that, on a 64-bit platform,
    230    // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
    231    unsigned long long int block[2];
    232    block[0] = (unsigned long long int)(unsigned long)p;
    233    block[1] = n;
    234    __asm__ __volatile__(
    235       "movq 0(%%rsi),%%rax"      "\n\t"
    236       "movq 8(%%rsi),%%rbx"      "\n\t"
    237       "lock; addq %%rbx,(%%rax)" "\n"
    238       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
    239    );
    240 #elif defined(VGA_ppc64)
    241    unsigned long success;
    242    do {
    243       __asm__ __volatile__(
    244          "ldarx  15,0,%1"    "\n\t"
    245          "add    15,15,%2"   "\n\t"
    246          "stdcx. 15,0,%1"    "\n\t"
    247          "mfcr   %0"         "\n\t"
    248          "srwi   %0,%0,29"   "\n\t"
    249          "andi.  %0,%0,1"    "\n"
    250          : /*out*/"=b"(success)
    251          : /*in*/ "b"(p), "b"(n)
    252          : /*trash*/ "memory", "cc", "r15"
    253       );
    254    } while (success != 1);
    255 #else
    256 # error "Unsupported arch"
    257 #endif
    258 }
    259 
    260 int main ( int argc, char** argv )
    261 {
    262    int    i, status;
    263    char*  page;
    264    char*  p8;
    265    short* p16;
    266    int*   p32;
    267    long long int* p64;
    268    pid_t  child, p2;
    269 
    270    printf("parent, pre-fork\n");
    271 
    272    page = mmap( 0, sysconf(_SC_PAGESIZE),
    273                    PROT_READ|PROT_WRITE,
    274                    MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
    275    if (page == MAP_FAILED) {
    276       perror("mmap failed");
    277       exit(1);
    278    }
    279 
    280    p8  = (char*)(page+0);
    281    p16 = (short*)(page+256);
    282    p32 = (int*)(page+512);
    283    p64 = (long long int*)(page+768);
    284 
    285    assert( IS_8_ALIGNED(p8) );
    286    assert( IS_8_ALIGNED(p16) );
    287    assert( IS_8_ALIGNED(p32) );
    288    assert( IS_8_ALIGNED(p64) );
    289 
    290    memset(page, 0, 1024);
    291 
    292    *p8  = 0;
    293    *p16 = 0;
    294    *p32 = 0;
    295    *p64 = 0;
    296 
    297    child = fork();
    298    if (child == -1) {
    299       perror("fork() failed\n");
    300       return 1;
    301    }
    302 
    303    if (child == 0) {
    304       /* --- CHILD --- */
    305       printf("child\n");
    306       for (i = 0; i < NNN; i++) {
    307          atomic_add_8bit(p8, 1);
    308          atomic_add_16bit(p16, 1);
    309          atomic_add_32bit(p32, 1);
    310          atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
    311       }
    312       return 1;
    313       /* NOTREACHED */
    314 
    315    }
    316 
    317    /* --- PARENT --- */
    318 
    319    printf("parent\n");
    320 
    321    for (i = 0; i < NNN; i++) {
    322       atomic_add_8bit(p8, 1);
    323       atomic_add_16bit(p16, 1);
    324       atomic_add_32bit(p32, 1);
    325       atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
    326    }
    327 
    328    p2 = waitpid(child, &status, 0);
    329    assert(p2 == child);
    330 
    331    /* assert that child finished normally */
    332    assert(WIFEXITED(status));
    333 
    334    printf("FINAL VALUES:  8 bit %d,  16 bit %d,  32 bit %d,  64 bit %lld\n",
    335           (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
    336 
    337    if (-74 == (int)(*(signed char*)p8)
    338        && 32694 == (int)(*p16)
    339        && 6913974 == *p32
    340        && (0LL == *p64 || 682858642110LL == *p64)) {
    341       printf("PASS\n");
    342    } else {
    343       printf("FAIL -- see source code for expected values\n");
    344    }
    345 
    346    printf("parent exits\n");
    347 
    348    return 0;
    349 }
    350