Home | History | Annotate | Download | only in tests
      1 
      2 /* This is an example of a program which does atomic memory operations
      3    between two processes which share a page.  Valgrind 3.4.1 and
      4    earlier produce incorrect answers because it does not preserve
      5    atomicity of the relevant instructions in the generated code; but
      6    the post-DCAS-merge versions of Valgrind do behave correctly. */
      7 
      8 /* On ARM, this can be compiled into either ARM or Thumb code, so as
      9    to test both A and T encodings of LDREX/STREX et al.  Also on ARM,
     10    it tests doubleword atomics (LDREXD, STREXD) which I don't think it
     11    does on any other platform. */
     12 
     13 #include <stdlib.h>
     14 #include <stdio.h>
     15 #include <string.h>
     16 #include <assert.h>
     17 #include <unistd.h>
     18 #include <sys/wait.h>
     19 #include "tests/sys_mman.h"
     20 
     21 #define NNN 3456987
     22 
     23 #define IS_8_ALIGNED(_ptr)   (0 == (((unsigned long)(_ptr)) & 7))
     24 
     25 
     26 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
     27 {
     28 #if defined(VGA_x86)
     29    unsigned long block[2];
     30    block[0] = (unsigned long)p;
     31    block[1] = n;
     32    __asm__ __volatile__(
     33       "movl 0(%%esi),%%eax"      "\n\t"
     34       "movl 4(%%esi),%%ebx"      "\n\t"
     35       "lock; addb %%bl,(%%eax)"  "\n"
     36       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
     37    );
     38 #elif defined(VGA_amd64)
     39    unsigned long block[2];
     40    block[0] = (unsigned long)p;
     41    block[1] = n;
     42    __asm__ __volatile__(
     43       "movq 0(%%rsi),%%rax"      "\n\t"
     44       "movq 8(%%rsi),%%rbx"      "\n\t"
     45       "lock; addb %%bl,(%%rax)"  "\n"
     46       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
     47    );
     48 #elif defined(VGA_ppc32)
     49    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
     50       is 4-aligned -- guaranteed by caller. */
     51    unsigned long success;
     52    do {
     53       __asm__ __volatile__(
     54          "lwarx  15,0,%1"    "\n\t"
     55          "add    15,15,%2"   "\n\t"
     56          "stwcx. 15,0,%1"    "\n\t"
     57          "mfcr   %0"         "\n\t"
     58          "srwi   %0,%0,29"   "\n\t"
     59          "andi.  %0,%0,1"    "\n"
     60          : /*out*/"=b"(success)
     61          : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
     62          : /*trash*/ "memory", "cc", "r15"
     63       );
     64    } while (success != 1);
     65 #elif defined(VGA_ppc64)
     66    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
     67       is 8-aligned -- guaranteed by caller. */
     68    unsigned long success;
     69    do {
     70       __asm__ __volatile__(
     71          "ldarx  15,0,%1"    "\n\t"
     72          "add    15,15,%2"   "\n\t"
     73          "stdcx. 15,0,%1"    "\n\t"
     74          "mfcr   %0"         "\n\t"
     75          "srwi   %0,%0,29"   "\n\t"
     76          "andi.  %0,%0,1"    "\n"
     77          : /*out*/"=b"(success)
     78          : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
     79          : /*trash*/ "memory", "cc", "r15"
     80       );
     81    } while (success != 1);
     82 #elif defined(VGA_arm)
     83    unsigned int block[3]
     84       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
     85    do {
     86       __asm__ __volatile__(
     87          "mov    r5, %0"         "\n\t"
     88          "ldr    r9, [r5, #0]"   "\n\t" // p
     89          "ldr    r10, [r5, #4]"  "\n\t" // n
     90          "ldrexb r8, [r9]"       "\n\t"
     91          "add    r8, r8, r10"    "\n\t"
     92          "strexb r4, r8, [r9]"   "\n\t"
     93          "str    r4, [r5, #8]"   "\n\t"
     94          : /*out*/
     95          : /*in*/ "r"(&block[0])
     96          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
     97       );
     98    } while (block[2] != 0);
     99 #elif defined(VGA_s390x)
    100    int dummy;
    101    __asm__ __volatile__(
    102       "   l	0,%0\n\t"
    103       "0: st	0,%1\n\t"
    104       "   icm	1,1,%1\n\t"
    105       "   ar	1,%2\n\t"
    106       "   stcm  1,1,%1\n\t"
    107       "   l     1,%1\n\t"
    108       "   cs	0,1,%0\n\t"
    109       "   jl    0b\n\t"
    110       : "+m" (*p), "+m" (dummy)
    111       : "d" (n)
    112       : "cc", "memory", "0", "1");
    113 #elif defined(VGA_mips32)
    114 #if defined (_MIPSEL)
    115    unsigned int block[3]
    116       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
    117    do {
    118       __asm__ __volatile__(
    119          "move   $t0, %0"         "\n\t"
    120          "lw   $t1, 0($t0)"       "\n\t" // p
    121          "lw   $t2, 4($t0)"       "\n\t" // n
    122          "ll   $t3, 0($t1)"       "\n\t"
    123          "addu   $t3, $t3, $t2"   "\n\t"
    124          "andi   $t3, $t3, 0xFF"  "\n\t"
    125          "sc   $t3, 0($t1)"       "\n\t"
    126          "sw $t3, 8($t0)"         "\n\t"
    127          : /*out*/
    128          : /*in*/ "r"(&block[0])
    129          : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
    130       );
    131    } while (block[2] != 1);
    132 #elif defined (_MIPSEB)
    133    unsigned int block[3]
    134       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
    135    do {
    136       __asm__ __volatile__(
    137          "move   $t0, %0"               "\n\t"
    138          "lw   $t1, 0($t0)"             "\n\t" // p
    139          "lw   $t2, 4($t0)"             "\n\t" // n
    140          "li   $t4, 0x000000FF"         "\n\t"
    141          "ll   $t3, 0($t1)"             "\n\t"
    142          "addu $t3, $t3, $t2"           "\n\t"
    143          "and  $t3, $t3, $t4"           "\n\t"
    144          "wsbh $t4, $t3"                "\n\t"
    145          "rotr $t4, $t4, 16"            "\n\t"
    146          "or   $t3, $t4, $t3"           "\n\t"
    147          "sc   $t3, 0($t1)"             "\n\t"
    148          "sw $t3, 8($t0)"               "\n\t"
    149          : /*out*/
    150          : /*in*/ "r"(&block[0])
    151          : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3", "t4"
    152       );
    153    } while (block[2] != 1);
    154 #endif
    155 #else
    156 # error "Unsupported arch"
    157 #endif
    158 }
    159 
    160 
    161 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
    162 {
    163 #if defined(VGA_x86)
    164    unsigned long block[2];
    165    block[0] = (unsigned long)p;
    166    block[1] = n;
    167    __asm__ __volatile__(
    168       "movl 0(%%esi),%%eax"      "\n\t"
    169       "movl 4(%%esi),%%ebx"      "\n\t"
    170       "lock; addw %%bx,(%%eax)"  "\n"
    171       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
    172    );
    173 #elif defined(VGA_amd64)
    174    unsigned long block[2];
    175    block[0] = (unsigned long)p;
    176    block[1] = n;
    177    __asm__ __volatile__(
    178       "movq 0(%%rsi),%%rax"      "\n\t"
    179       "movq 8(%%rsi),%%rbx"      "\n\t"
    180       "lock; addw %%bx,(%%rax)"  "\n"
    181       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
    182    );
    183 #elif defined(VGA_ppc32)
    184    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
    185       is 8-aligned -- guaranteed by caller. */
    186    unsigned long success;
    187    do {
    188       __asm__ __volatile__(
    189          "lwarx  15,0,%1"    "\n\t"
    190          "add    15,15,%2"   "\n\t"
    191          "stwcx. 15,0,%1"    "\n\t"
    192          "mfcr   %0"         "\n\t"
    193          "srwi   %0,%0,29"   "\n\t"
    194          "andi.  %0,%0,1"    "\n"
    195          : /*out*/"=b"(success)
    196          : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
    197          : /*trash*/ "memory", "cc", "r15"
    198       );
    199    } while (success != 1);
    200 #elif defined(VGA_ppc64)
    201    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
    202       is 8-aligned -- guaranteed by caller. */
    203    unsigned long success;
    204    do {
    205       __asm__ __volatile__(
    206          "ldarx  15,0,%1"    "\n\t"
    207          "add    15,15,%2"   "\n\t"
    208          "stdcx. 15,0,%1"    "\n\t"
    209          "mfcr   %0"         "\n\t"
    210          "srwi   %0,%0,29"   "\n\t"
    211          "andi.  %0,%0,1"    "\n"
    212          : /*out*/"=b"(success)
    213          : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
    214          : /*trash*/ "memory", "cc", "r15"
    215       );
    216    } while (success != 1);
    217 #elif defined(VGA_arm)
    218    unsigned int block[3]
    219       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
    220    do {
    221       __asm__ __volatile__(
    222          "mov    r5, %0"         "\n\t"
    223          "ldr    r9, [r5, #0]"   "\n\t" // p
    224          "ldr    r10, [r5, #4]"  "\n\t" // n
    225          "ldrexh r8, [r9]"       "\n\t"
    226          "add    r8, r8, r10"    "\n\t"
    227          "strexh r4, r8, [r9]"   "\n\t"
    228          "str    r4, [r5, #8]"   "\n\t"
    229          : /*out*/
    230          : /*in*/ "r"(&block[0])
    231          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
    232       );
    233    } while (block[2] != 0);
    234 #elif defined(VGA_s390x)
    235    int dummy;
    236    __asm__ __volatile__(
    237       "   l	0,%0\n\t"
    238       "0: st	0,%1\n\t"
    239       "   icm	1,3,%1\n\t"
    240       "   ar	1,%2\n\t"
    241       "   stcm  1,3,%1\n\t"
    242       "   l     1,%1\n\t"
    243       "   cs	0,1,%0\n\t"
    244       "   jl    0b\n\t"
    245       : "+m" (*p), "+m" (dummy)
    246       : "d" (n)
    247       : "cc", "memory", "0", "1");
    248 #elif defined(VGA_mips32)
    249 #if defined (_MIPSEL)
    250    unsigned int block[3]
    251       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
    252    do {
    253       __asm__ __volatile__(
    254          "move   $t0, %0"         "\n\t"
    255          "lw   $t1, 0($t0)"       "\n\t" // p
    256          "lw   $t2, 4($t0)"       "\n\t" // n
    257          "ll   $t3, 0($t1)"       "\n\t"
    258          "addu   $t3, $t3, $t2"   "\n\t"
    259          "andi   $t3, $t3, 0xFFFF"  "\n\t"
    260          "sc   $t3, 0($t1)"       "\n\t"
    261          "sw $t3, 8($t0)"         "\n\t"
    262          : /*out*/
    263          : /*in*/ "r"(&block[0])
    264          : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
    265       );
    266    } while (block[2] != 1);
    267 #elif defined (_MIPSEB)
    268    unsigned int block[3]
    269       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
    270    do {
    271       __asm__ __volatile__(
    272          "move   $t0, %0"         "\n\t"
    273          "lw   $t1, 0($t0)"       "\n\t" // p
    274          "li   $t2, 32694"        "\n\t" // n
    275          "li   $t3, 0x1"          "\n\t"
    276          "sll  $t2, $t2, 16"      "\n\t"
    277          "sw   $t2, 0($t1)"       "\n\t"
    278          "sw $t3, 8($t0)"         "\n\t"
    279          : /*out*/
    280          : /*in*/ "r"(&block[0])
    281          : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
    282       );
    283    } while (block[2] != 1);
    284 #endif
    285 #else
    286 # error "Unsupported arch"
    287 #endif
    288 }
    289 
    290 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
    291 {
    292 #if defined(VGA_x86)
    293    unsigned long block[2];
    294    block[0] = (unsigned long)p;
    295    block[1] = n;
    296    __asm__ __volatile__(
    297       "movl 0(%%esi),%%eax"       "\n\t"
    298       "movl 4(%%esi),%%ebx"       "\n\t"
    299       "lock; addl %%ebx,(%%eax)"  "\n"
    300       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
    301    );
    302 #elif defined(VGA_amd64)
    303    unsigned long block[2];
    304    block[0] = (unsigned long)p;
    305    block[1] = n;
    306    __asm__ __volatile__(
    307       "movq 0(%%rsi),%%rax"       "\n\t"
    308       "movq 8(%%rsi),%%rbx"       "\n\t"
    309       "lock; addl %%ebx,(%%rax)"  "\n"
    310       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
    311    );
    312 #elif defined(VGA_ppc32)
    313    unsigned long success;
    314    do {
    315       __asm__ __volatile__(
    316          "lwarx  15,0,%1"    "\n\t"
    317          "add    15,15,%2"   "\n\t"
    318          "stwcx. 15,0,%1"    "\n\t"
    319          "mfcr   %0"         "\n\t"
    320          "srwi   %0,%0,29"   "\n\t"
    321          "andi.  %0,%0,1"    "\n"
    322          : /*out*/"=b"(success)
    323          : /*in*/ "b"(p), "b"(n)
    324          : /*trash*/ "memory", "cc", "r15"
    325       );
    326    } while (success != 1);
    327 #elif defined(VGA_ppc64)
    328    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
    329       is 8-aligned -- guaranteed by caller. */
    330    unsigned long success;
    331    do {
    332       __asm__ __volatile__(
    333          "ldarx  15,0,%1"    "\n\t"
    334          "add    15,15,%2"   "\n\t"
    335          "stdcx. 15,0,%1"    "\n\t"
    336          "mfcr   %0"         "\n\t"
    337          "srwi   %0,%0,29"   "\n\t"
    338          "andi.  %0,%0,1"    "\n"
    339          : /*out*/"=b"(success)
    340          : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
    341          : /*trash*/ "memory", "cc", "r15"
    342       );
    343    } while (success != 1);
    344 #elif defined(VGA_arm)
    345    unsigned int block[3]
    346       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
    347    do {
    348       __asm__ __volatile__(
    349          "mov   r5, %0"         "\n\t"
    350          "ldr   r9, [r5, #0]"   "\n\t" // p
    351          "ldr   r10, [r5, #4]"  "\n\t" // n
    352          "ldrex r8, [r9]"       "\n\t"
    353          "add   r8, r8, r10"    "\n\t"
    354          "strex r4, r8, [r9]"   "\n\t"
    355          "str   r4, [r5, #8]"   "\n\t"
    356          : /*out*/
    357          : /*in*/ "r"(&block[0])
    358          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
    359       );
    360    } while (block[2] != 0);
    361 #elif defined(VGA_s390x)
    362    __asm__ __volatile__(
    363       "   l	0,%0\n\t"
    364       "0: lr	1,0\n\t"
    365       "   ar	1,%1\n\t"
    366       "   cs	0,1,%0\n\t"
    367       "   jl    0b\n\t"
    368       : "+m" (*p)
    369       : "d" (n)
    370       : "cc", "memory", "0", "1");
    371 #elif defined(VGA_mips32)
    372    unsigned int block[3]
    373       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
    374    do {
    375       __asm__ __volatile__(
    376          "move   $t0, %0"         "\n\t"
    377          "lw   $t1, 0($t0)"       "\n\t" // p
    378          "lw   $t2, 4($t0)"       "\n\t" // n
    379          "ll   $t3, 0($t1)"       "\n\t"
    380          "addu   $t3, $t3, $t2"   "\n\t"
    381          "sc   $t3, 0($t1)"       "\n\t"
    382          "sw $t3, 8($t0)"         "\n\t"
    383          : /*out*/
    384          : /*in*/ "r"(&block[0])
    385          : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
    386       );
    387    } while (block[2] != 1);
    388 #else
    389 # error "Unsupported arch"
    390 #endif
    391 }
    392 
    393 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
    394 {
    395 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32)
    396    /* do nothing; is not supported */
    397 #elif defined(VGA_amd64)
    398    // this is a bit subtle.  It relies on the fact that, on a 64-bit platform,
    399    // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
    400    unsigned long long int block[2];
    401    block[0] = (unsigned long long int)(unsigned long)p;
    402    block[1] = n;
    403    __asm__ __volatile__(
    404       "movq 0(%%rsi),%%rax"      "\n\t"
    405       "movq 8(%%rsi),%%rbx"      "\n\t"
    406       "lock; addq %%rbx,(%%rax)" "\n"
    407       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
    408    );
    409 #elif defined(VGA_ppc64)
    410    unsigned long success;
    411    do {
    412       __asm__ __volatile__(
    413          "ldarx  15,0,%1"    "\n\t"
    414          "add    15,15,%2"   "\n\t"
    415          "stdcx. 15,0,%1"    "\n\t"
    416          "mfcr   %0"         "\n\t"
    417          "srwi   %0,%0,29"   "\n\t"
    418          "andi.  %0,%0,1"    "\n"
    419          : /*out*/"=b"(success)
    420          : /*in*/ "b"(p), "b"(n)
    421          : /*trash*/ "memory", "cc", "r15"
    422       );
    423    } while (success != 1);
    424 #elif defined(VGA_arm)
    425    unsigned long long int block[3]
    426      = { (unsigned long long int)(unsigned long)p,
    427          (unsigned long long int)n,
    428          0xFFFFFFFFFFFFFFFFULL };
    429    do {
    430       __asm__ __volatile__(
    431          "mov    r5, %0"             "\n\t"
    432          "ldr    r8,     [r5, #0]"   "\n\t" // p
    433          "ldrd   r2, r3, [r5, #8]"   "\n\t" // n
    434          "ldrexd r0, r1, [r8]"       "\n\t"
    435          "adds   r2, r2, r0"         "\n\t"
    436          "adc    r3, r3, r1"         "\n\t"
    437          "strexd r1, r2, r3, [r8]"   "\n\t"
    438          "str    r1, [r5, #16]"      "\n\t"
    439          : /*out*/
    440          : /*in*/ "r"(&block[0])
    441          : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3"
    442       );
    443    } while (block[2] != 0xFFFFFFFF00000000ULL);
    444 #elif defined(VGA_s390x)
    445    __asm__ __volatile__(
    446       "   lg	0,%0\n\t"
    447       "0: lgr	1,0\n\t"
    448       "   agr	1,%1\n\t"
    449       "   csg	0,1,%0\n\t"
    450       "   jl    0b\n\t"
    451       : "+m" (*p)
    452       : "d" (n)
    453       : "cc", "memory", "0", "1");
    454 #else
    455 # error "Unsupported arch"
    456 #endif
    457 }
    458 
    459 int main ( int argc, char** argv )
    460 {
    461    int    i, status;
    462    char*  page;
    463    char*  p8;
    464    short* p16;
    465    int*   p32;
    466    long long int* p64;
    467    pid_t  child, p2;
    468 
    469    printf("parent, pre-fork\n");
    470 
    471    page = mmap( 0, sysconf(_SC_PAGESIZE),
    472                    PROT_READ|PROT_WRITE,
    473                    MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
    474    if (page == MAP_FAILED) {
    475       perror("mmap failed");
    476       exit(1);
    477    }
    478 
    479    p8  = (char*)(page+0);
    480    p16 = (short*)(page+256);
    481    p32 = (int*)(page+512);
    482    p64 = (long long int*)(page+768);
    483 
    484    assert( IS_8_ALIGNED(p8) );
    485    assert( IS_8_ALIGNED(p16) );
    486    assert( IS_8_ALIGNED(p32) );
    487    assert( IS_8_ALIGNED(p64) );
    488 
    489    memset(page, 0, 1024);
    490 
    491    *p8  = 0;
    492    *p16 = 0;
    493    *p32 = 0;
    494    *p64 = 0;
    495 
    496    child = fork();
    497    if (child == -1) {
    498       perror("fork() failed\n");
    499       return 1;
    500    }
    501 
    502    if (child == 0) {
    503       /* --- CHILD --- */
    504       printf("child\n");
    505       for (i = 0; i < NNN; i++) {
    506          atomic_add_8bit(p8, 1);
    507          atomic_add_16bit(p16, 1);
    508          atomic_add_32bit(p32, 1);
    509          atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
    510       }
    511       return 1;
    512       /* NOTREACHED */
    513 
    514    }
    515 
    516    /* --- PARENT --- */
    517 
    518    printf("parent\n");
    519 
    520    for (i = 0; i < NNN; i++) {
    521       atomic_add_8bit(p8, 1);
    522       atomic_add_16bit(p16, 1);
    523       atomic_add_32bit(p32, 1);
    524       atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
    525    }
    526 
    527    p2 = waitpid(child, &status, 0);
    528    assert(p2 == child);
    529 
    530    /* assert that child finished normally */
    531    assert(WIFEXITED(status));
    532 
    533    printf("FINAL VALUES:  8 bit %d,  16 bit %d,  32 bit %d,  64 bit %lld\n",
    534           (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
    535 
    536    if (-74 == (int)(*(signed char*)p8)
    537        && 32694 == (int)(*p16)
    538        && 6913974 == *p32
    539        && (0LL == *p64 || 682858642110LL == *p64)) {
    540       printf("PASS\n");
    541    } else {
    542       printf("FAIL -- see source code for expected values\n");
    543    }
    544 
    545    printf("parent exits\n");
    546 
    547    return 0;
    548 }
    549