Home | History | Annotate | Download | only in tests
      1 // This program is a thorough test of the LOADVn/STOREVn shadow memory
      2 // operations.
      4 #include <assert.h>
      5 #include <stdlib.h>
      6 #include <stdio.h>
      7 #include <string.h>
      8 #include "tests/sys_mman.h"
      9 #include "memcheck/memcheck.h"
     11 // All the sizes here are in *bytes*, not bits.
     13 typedef unsigned char        U1;
     14 typedef unsigned short       U2;
     15 typedef unsigned int         U4;
     16 typedef unsigned long long   U8;
     18 typedef float                F4;
     19 typedef double               F8;
     21 typedef unsigned long        UWord;
     23 #define PAGE_SIZE 4096ULL
     26 // XXX: should check the error cases for SET/GET_VBITS also
     28 // For the byte 'x', build a value of 'size' bytes from that byte, eg:
     29 //   size 1 --> x
     30 //   size 2 --> xx
     31 //   size 4 --> xxxx
     32 //   size 8 --> xxxxxxxx
     33 // where the 0 bits are seen by Memcheck as defined, and the 1 bits are
     34 // seen as undefined (ie. the value of each bit matches its V bit, ie. the
     35 // resulting value is the same as its metavalue).
     36 //
     37 U8 build(int size, U1 byte)
     38 {
     39    int i;
     40    U8 mask = 0;
     41    U8 shres;
     42    U8 res = 0xffffffffffffffffULL, res2;
     43    (void)VALGRIND_MAKE_MEM_UNDEFINED(&res, 8);
     44    assert(1 == size || 2 == size || 4 == size || 8 == size);
     46    for (i = 0; i < size; i++) {
     47       mask <<= 8;
     48       mask |= (U8)byte;
     49    }
     51    res &= mask;
     53    // res is now considered partially defined, but we know exactly what its
     54    // value is (it happens to be the same as its metavalue).
     56    (void)VALGRIND_GET_VBITS(&res, &shres, 8);
     57    res2 = res;
     58    (void)VALGRIND_MAKE_MEM_DEFINED(&res2, 8);  // avoid the 'undefined' warning
     59    assert(res2 == shres);
     60    return res;
     61 }
     63 U1 make_defined ( U1 x )
     64 {
     65    volatile U1 xx = x;
     66    (void)VALGRIND_MAKE_MEM_DEFINED(&xx, 1);
     67    return xx;
     68 }
     70 void check(U1* arr, int n, char* who)
     71 {
     72    int i;
     73    U1* shadow = malloc(n);
     74    U1 arr_i;
     75    U8 sum = 0;
     76    (void)VALGRIND_GET_VBITS(arr, shadow, n);
     77    for (i = 0; i < n; i++) {
     78       arr_i = make_defined(arr[i]);
     79       if (arr_i != shadow[i]) {
     80           fprintf(stderr, "\n\nFAILURE: %s, byte %d -- "
     81                           "is 0x%x, should be 0x%x\n\n",
     82                           who, i, shadow[i], arr[i]);
     83           exit(1);
     84       }
     85       sum += (U8)arr_i;
     86    }
     87    free(shadow);
     88    printf("test passed, sum = %llu (%9.5f per byte)\n",
     89 	  sum, (F8)sum / (F8)n);
     90 }
     92 static inline U4 randomU4 ( void )
     93 {
     94    static U4 n = 0;
     95    /* From "Numerical Recipes in C" 2nd Edition */
     96    n = 1664525UL * n + 1013904223UL;
     97    return n;
     98 }
    100 static inline U1 randomU1 ( void )
    101 {
    102    return 0xFF & (randomU4() >> 13);
    103 }
    105 // NB!  300000 is really not enough to shake out all failures.
    106 // Increasing it by a factor of 256 is, but makes the test take
    107 // the best part of an hour.
    108 #define N_BYTES  (300000 /* * 256 */)
    109 #define N_EVENTS (5 * N_BYTES)
    112 void do_test_at ( U1* arr )
    113 {
    114    int i;
    116    U4 mv1 = 0, mv2 = 0, mv4 = 0, mv8 = 0, mv4f = 0, mv8f = 0;
    118    /* Fill arr with random bytes whose shadows match them. */
    119    if (0) printf("-------- arr = %p\n", arr);
    121    printf("initialising\n");
    122    for (i = 0; i < N_BYTES; i++)
    123       arr[i] = (U1)build(1, randomU1());
    125    printf("post-initialisation check\n");
    126    check(arr, N_BYTES, "after initialisation");
    128    /* Now do huge numbers of memory copies. */
    129    printf("doing copies\n");
    130    for (i = 0; i < N_EVENTS; i++) {
    131       U4 ty, src, dst;
    132       ty  = (randomU4() >> 13) % 5;
    133      tryagain:
    134       src = (randomU4() >>  1) % N_BYTES;
    135       dst = (randomU4() >>  3) % N_BYTES;
    136       switch (ty) {
    137          case 0: { // U1
    138             *(U1*)(arr+dst) = *(U1*)(arr+src);
    139 	    mv1++;
    140             break;
    141          }
    142          case 1: { // U2
    143             if (src+2 >= N_BYTES || dst+2 >= N_BYTES)
    144                goto tryagain;
    145             *(U2*)(arr+dst) = *(U2*)(arr+src);
    146 	    mv2++;
    147             break;
    148          }
    149          case 2: { // U4
    150             if (src+4 >= N_BYTES || dst+4 >= N_BYTES)
    151                goto tryagain;
    152             *(U4*)(arr+dst) = *(U4*)(arr+src);
    153 	    mv4++;
    154             break;
    155          }
    156          case 3: { // U8
    157             if (src+8 >= N_BYTES || dst+8 >= N_BYTES)
    158                goto tryagain;
    159             *(U8*)(arr+dst) = *(U8*)(arr+src);
    160 	    mv8++;
    161             break;
    162          }
    163          /* Don't bother with 32-bit floats.  These cause
    164             horrible complications, as discussed in sh-mem.c. */
    165          /*
    166          case 4: { // F4
    167             if (src+4 >= N_BYTES || dst+4 >= N_BYTES)
    168                goto tryagain;
    169             *(F4*)(arr+dst) = *(F4*)(arr+src);
    170 	    mv4f++;
    171             break;
    172          }
    173          */
    174          case 4: { // F8
    175             if (src+8 >= N_BYTES || dst+8 >= N_BYTES)
    176                goto tryagain;
    177 #if defined(__i386__)
    178 	    /* Copying via an x87 register causes the test to fail,
    179                because (I think) some obscure values that are FP
    180                denormals get changed during the copy due to the FPU
    181                normalising, or rounding, or whatever, them.  This
    182                causes them to no longer bit-for-bit match the
    183                accompanying metadata.  Yet we still need to do a
    184                genuine 8-byte load/store to test the relevant memcheck
    185                {LOADV8,STOREV8} routines.  Hence use the MMX registers
    186                instead, as copying through them should be
    187                straightforward.. */
    188             __asm__ __volatile__(
    189                "movq (%1), %%mm2\n\t"
    190                "movq %%mm2, (%0)\n\t"
    191                "emms"
    192                : : "r"(arr+dst), "r"(arr+src) : "memory"
    193             );
    194 #elif defined(__linux__) && defined(__arm__) && !defined(__aarch64__)
    195             /* On arm32, many compilers generate a 64-bit float move
    196                using two 32 bit integer registers, which completely
    197                defeats this test.  Hence force a 64-bit NEON load and
    198                store.  I guess this will break the build on non-NEON
    199                capable targets. */
    200             __asm__ __volatile__ (
    201                "vld1.64 {d7},[%0] ; vst1.64 {d7},[%1] "
    202                : : "r"(arr+src), "r"(arr+dst) : "d7","memory"
    203             );
    204 #else
    205             /* Straightforward.  On amd64, this gives a load/store of
    206                the bottom half of an xmm register.  On ppc32/64 this
    207                is a straighforward load/store of an FP register. */
    208             *(F8*)(arr+dst) = *(F8*)(arr+src);
    209 #endif
    210 	    mv8f++;
    211             break;
    212          }
    213          default:
    214 	   fprintf(stderr, "sh-mem-random: bad size\n");
    215 	   exit(0);
    216       }
    217    }
    219    printf("final check\n");
    220    check(arr, N_BYTES, "final check");
    222    printf("counts 1/2/4/8/F4/F8: %d %d %d %d %d %d\n",
    223           mv1, mv2, mv4, mv8, mv4f, mv8f);
    224 }
    228 int main(void)
    229 {
    230    U1* arr;
    232    if (0 == RUNNING_ON_VALGRIND) {
    233       fprintf(stderr, "error: this program only works when run under Valgrind\n");
    234       exit(1);
    235    }
    237    printf("-------- testing non-auxmap range --------\n");
    239    arr = malloc(N_BYTES);
    240    assert(arr);
    241    do_test_at(arr);
    242    free(arr);
    244    if (sizeof(void*) == 8) {
    245       // 64-bit platform.
    246       int tries;
    247       int nbytes_p;
    248       // (U1*)(UWord)constULL funny casting to keep gcc quiet on
    249       // 32-bit platforms
    250       U1* huge_addr = (U1*)(UWord)0x6600000000ULL;  // 408GB
    251       // Note, kernel 2.6.? on Athlon64 refuses fixed mmap requests
    252       // at above 512GB.
    254       printf("-------- testing auxmap range --------\n");
    256       nbytes_p = (N_BYTES + PAGE_SIZE) & ~(PAGE_SIZE-1);
    258       for (tries = 0; tries < 10; tries++) {
    259          arr = mmap(huge_addr, nbytes_p, PROT_READ|PROT_WRITE,
    260                     MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
    261 	 if (arr != MAP_FAILED)
    262             break;
    263 	 // hmm. fudge the address and try again.
    264          huge_addr += (randomU4() & ~(PAGE_SIZE-1));
    265       }
    267       if (tries >= 10) {
    268 	   fprintf(stderr, "sh-mem-random: can't mmap hi-mem\n");
    269 	   exit(0);
    270       }
    271       assert(arr != MAP_FAILED);
    273       do_test_at(arr);
    274    }
    276    return 0;
    278 }