Home | History | Annotate | Download | only in memcheck
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
      4 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
      5 /*---                                                    mc_main.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of MemCheck, a heavyweight Valgrind tool for
     10    detecting memory errors.
     11 
     12    Copyright (C) 2000-2011 Julian Seward
     13       jseward (at) acm.org
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 */
     32 
     33 #include "pub_tool_basics.h"
     34 #include "pub_tool_aspacemgr.h"
     35 #include "pub_tool_gdbserver.h"
     36 #include "pub_tool_hashtable.h"     // For mc_include.h
     37 #include "pub_tool_libcbase.h"
     38 #include "pub_tool_libcassert.h"
     39 #include "pub_tool_libcprint.h"
     40 #include "pub_tool_machine.h"
     41 #include "pub_tool_mallocfree.h"
     42 #include "pub_tool_options.h"
     43 #include "pub_tool_oset.h"
     44 #include "pub_tool_replacemalloc.h"
     45 #include "pub_tool_tooliface.h"
     46 #include "pub_tool_threadstate.h"
     47 
     48 #include "mc_include.h"
     49 #include "memcheck.h"   /* for client requests */
     50 
     51 
     52 /* We really want this frame-pointer-less on all platforms, since the
     53    helper functions are small and called very frequently.  By default
     54    on x86-linux, though, Makefile.all.am doesn't specify it, so do it
     55    here.  Requires gcc >= 4.4, unfortunately. */
     56 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
     57 # pragma GCC optimize("-fomit-frame-pointer")
     58 #endif
     59 
     60 
     61 /* Set to 1 to do a little more sanity checking */
     62 #define VG_DEBUG_MEMORY 0
     63 
     64 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
     65 
     66 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
     67 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
     68 
     69 
     70 /*------------------------------------------------------------*/
     71 /*--- Fast-case knobs                                      ---*/
     72 /*------------------------------------------------------------*/
     73 
     74 // Comment these out to disable the fast cases (don't just set them to zero).
     75 
     76 #define PERF_FAST_LOADV    1
     77 #define PERF_FAST_STOREV   1
     78 
     79 #define PERF_FAST_SARP     1
     80 
     81 #define PERF_FAST_STACK    1
     82 #define PERF_FAST_STACK2   1
     83 
     84 /* Change this to 1 to enable assertions on origin tracking cache fast
     85    paths */
     86 #define OC_ENABLE_ASSERTIONS 0
     87 
     88 
     89 /*------------------------------------------------------------*/
     90 /*--- Comments on the origin tracking implementation       ---*/
     91 /*------------------------------------------------------------*/
     92 
     93 /* See detailed comment entitled
     94    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
     95    which is contained further on in this file. */
     96 
     97 
     98 /*------------------------------------------------------------*/
     99 /*--- V bits and A bits                                    ---*/
    100 /*------------------------------------------------------------*/
    101 
    102 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
    103    thinks the corresponding value bit is defined.  And every memory byte
    104    has an A bit, which tracks whether Memcheck thinks the program can access
    105    it safely (ie. it's mapped, and has at least one of the RWX permission bits
    106    set).  So every N-bit register is shadowed with N V bits, and every memory
    107    byte is shadowed with 8 V bits and one A bit.
    108 
    109    In the implementation, we use two forms of compression (compressed V bits
    110    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
    111    for memory.
    112 
    113    Memcheck also tracks extra information about each heap block that is
    114    allocated, for detecting memory leaks and other purposes.
    115 */
    116 
    117 /*------------------------------------------------------------*/
    118 /*--- Basic A/V bitmap representation.                     ---*/
    119 /*------------------------------------------------------------*/
    120 
    121 /* All reads and writes are checked against a memory map (a.k.a. shadow
    122    memory), which records the state of all memory in the process.
    123 
    124    On 32-bit machines the memory map is organised as follows.
    125    The top 16 bits of an address are used to index into a top-level
    126    map table, containing 65536 entries.  Each entry is a pointer to a
    127    second-level map, which records the accesibililty and validity
    128    permissions for the 65536 bytes indexed by the lower 16 bits of the
    129    address.  Each byte is represented by two bits (details are below).  So
    130    each second-level map contains 16384 bytes.  This two-level arrangement
    131    conveniently divides the 4G address space into 64k lumps, each size 64k
    132    bytes.
    133 
    134    All entries in the primary (top-level) map must point to a valid
    135    secondary (second-level) map.  Since many of the 64kB chunks will
    136    have the same status for every bit -- ie. noaccess (for unused
    137    address space) or entirely addressable and defined (for code segments) --
    138    there are three distinguished secondary maps, which indicate 'noaccess',
    139    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
    140    map entry points to the relevant distinguished map.  In practice,
    141    typically more than half of the addressable memory is represented with
    142    the 'undefined' or 'defined' distinguished secondary map, so it gives a
    143    good saving.  It also lets us set the V+A bits of large address regions
    144    quickly in set_address_range_perms().
    145 
    146    On 64-bit machines it's more complicated.  If we followed the same basic
    147    scheme we'd have a four-level table which would require too many memory
    148    accesses.  So instead the top-level map table has 2^19 entries (indexed
    149    using bits 16..34 of the address);  this covers the bottom 32GB.  Any
    150    accesses above 32GB are handled with a slow, sparse auxiliary table.
    151    Valgrind's address space manager tries very hard to keep things below
    152    this 32GB barrier so that performance doesn't suffer too much.
    153 
    154    Note that this file has a lot of different functions for reading and
    155    writing shadow memory.  Only a couple are strictly necessary (eg.
    156    get_vabits2 and set_vabits2), most are just specialised for specific
    157    common cases to improve performance.
    158 
    159    Aside: the V+A bits are less precise than they could be -- we have no way
    160    of marking memory as read-only.  It would be great if we could add an
    161    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
    162    which requires 2.3 bits to hold, and there's no way to do that elegantly
    163    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
    164    seem worth it.
    165 */
    166 
    167 /* --------------- Basic configuration --------------- */
    168 
    169 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
    170 
    171 #if VG_WORDSIZE == 4
    172 
    173 /* cover the entire address space */
    174 #  define N_PRIMARY_BITS  16
    175 
    176 #else
    177 
    178 /* Just handle the first 256G fast and the rest via auxiliary
    179    primaries.  If you change this, Memcheck will assert at startup.
    180    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
    181 #  define N_PRIMARY_BITS  22
    182 
    183 #endif
    184 
    185 
    186 /* Do not change this. */
    187 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
    188 
    189 /* Do not change this. */
    190 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
    191 
    192 
    193 /* --------------- Secondary maps --------------- */
    194 
    195 // Each byte of memory conceptually has an A bit, which indicates its
    196 // addressability, and 8 V bits, which indicates its definedness.
    197 //
    198 // But because very few bytes are partially defined, we can use a nice
    199 // compression scheme to reduce the size of shadow memory.  Each byte of
    200 // memory has 2 bits which indicates its state (ie. V+A bits):
    201 //
    202 //   00:  noaccess    (unaddressable but treated as fully defined)
    203 //   01:  undefined   (addressable and fully undefined)
    204 //   10:  defined     (addressable and fully defined)
    205 //   11:  partdefined (addressable and partially defined)
    206 //
    207 // In the "partdefined" case, we use a secondary table to store the V bits.
    208 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
    209 // bits.
    210 //
    211 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
    212 // four bytes (32 bits) of memory are in each chunk.  Hence the name
    213 // "vabits8".  This lets us get the V+A bits for four bytes at a time
    214 // easily (without having to do any shifting and/or masking), and that is a
    215 // very common operation.  (Note that although each vabits8 chunk
    216 // is 8 bits in size, it represents 32 bits of memory.)
    217 //
    218 // The representation is "inverse" little-endian... each 4 bytes of
    219 // memory is represented by a 1 byte value, where:
    220 //
    221 // - the status of byte (a+0) is held in bits [1..0]
    222 // - the status of byte (a+1) is held in bits [3..2]
    223 // - the status of byte (a+2) is held in bits [5..4]
    224 // - the status of byte (a+3) is held in bits [7..6]
    225 //
    226 // It's "inverse" because endianness normally describes a mapping from
    227 // value bits to memory addresses;  in this case the mapping is inverted.
    228 // Ie. instead of particular value bits being held in certain addresses, in
    229 // this case certain addresses are represented by particular value bits.
    230 // See insert_vabits2_into_vabits8() for an example.
    231 //
    232 // But note that we don't compress the V bits stored in registers;  they
    233 // need to be explicit to made the shadow operations possible.  Therefore
    234 // when moving values between registers and memory we need to convert
    235 // between the expanded in-register format and the compressed in-memory
    236 // format.  This isn't so difficult, it just requires careful attention in a
    237 // few places.
    238 
    239 // These represent eight bits of memory.
    240 #define VA_BITS2_NOACCESS     0x0      // 00b
    241 #define VA_BITS2_UNDEFINED    0x1      // 01b
    242 #define VA_BITS2_DEFINED      0x2      // 10b
    243 #define VA_BITS2_PARTDEFINED  0x3      // 11b
    244 
    245 // These represent 16 bits of memory.
    246 #define VA_BITS4_NOACCESS     0x0      // 00_00b
    247 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
    248 #define VA_BITS4_DEFINED      0xa      // 10_10b
    249 
    250 // These represent 32 bits of memory.
    251 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
    252 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
    253 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
    254 
    255 // These represent 64 bits of memory.
    256 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
    257 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
    258 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
    259 
    260 
    261 #define SM_CHUNKS             16384
    262 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
    263 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
    264 
    265 // Paranoia:  it's critical for performance that the requested inlining
    266 // occurs.  So try extra hard.
    267 #define INLINE    inline __attribute__((always_inline))
    268 
    269 static INLINE Addr start_of_this_sm ( Addr a ) {
    270    return (a & (~SM_MASK));
    271 }
    272 static INLINE Bool is_start_of_sm ( Addr a ) {
    273    return (start_of_this_sm(a) == a);
    274 }
    275 
    276 typedef
    277    struct {
    278       UChar vabits8[SM_CHUNKS];
    279    }
    280    SecMap;
    281 
    282 // 3 distinguished secondary maps, one for no-access, one for
    283 // accessible but undefined, and one for accessible and defined.
    284 // Distinguished secondaries may never be modified.
    285 #define SM_DIST_NOACCESS   0
    286 #define SM_DIST_UNDEFINED  1
    287 #define SM_DIST_DEFINED    2
    288 
    289 static SecMap sm_distinguished[3];
    290 
    291 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
    292    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
    293 }
    294 
    295 // Forward declaration
    296 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
    297 
    298 /* dist_sm points to one of our three distinguished secondaries.  Make
    299    a copy of it so that we can write to it.
    300 */
    301 static SecMap* copy_for_writing ( SecMap* dist_sm )
    302 {
    303    SecMap* new_sm;
    304    tl_assert(dist_sm == &sm_distinguished[0]
    305           || dist_sm == &sm_distinguished[1]
    306           || dist_sm == &sm_distinguished[2]);
    307 
    308    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
    309    if (new_sm == NULL)
    310       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
    311                                    sizeof(SecMap) );
    312    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
    313    update_SM_counts(dist_sm, new_sm);
    314    return new_sm;
    315 }
    316 
    317 /* --------------- Stats --------------- */
    318 
    319 static Int   n_issued_SMs      = 0;
    320 static Int   n_deissued_SMs    = 0;
    321 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
    322 static Int   n_undefined_SMs   = 0;
    323 static Int   n_defined_SMs     = 0;
    324 static Int   n_non_DSM_SMs     = 0;
    325 static Int   max_noaccess_SMs  = 0;
    326 static Int   max_undefined_SMs = 0;
    327 static Int   max_defined_SMs   = 0;
    328 static Int   max_non_DSM_SMs   = 0;
    329 
    330 /* # searches initiated in auxmap_L1, and # base cmps required */
    331 static ULong n_auxmap_L1_searches  = 0;
    332 static ULong n_auxmap_L1_cmps      = 0;
    333 /* # of searches that missed in auxmap_L1 and therefore had to
    334    be handed to auxmap_L2. And the number of nodes inserted. */
    335 static ULong n_auxmap_L2_searches  = 0;
    336 static ULong n_auxmap_L2_nodes     = 0;
    337 
    338 static Int   n_sanity_cheap     = 0;
    339 static Int   n_sanity_expensive = 0;
    340 
    341 static Int   n_secVBit_nodes   = 0;
    342 static Int   max_secVBit_nodes = 0;
    343 
    344 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
    345 {
    346    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
    347    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
    348    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
    349    else                                                  { n_non_DSM_SMs  --;
    350                                                            n_deissued_SMs ++; }
    351 
    352    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
    353    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
    354    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
    355    else                                                  { n_non_DSM_SMs  ++;
    356                                                            n_issued_SMs   ++; }
    357 
    358    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
    359    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
    360    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
    361    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
    362 }
    363 
    364 /* --------------- Primary maps --------------- */
    365 
    366 /* The main primary map.  This covers some initial part of the address
    367    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
    368    handled using the auxiliary primary map.
    369 */
    370 static SecMap* primary_map[N_PRIMARY_MAP];
    371 
    372 
    373 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
    374    value, and sm points at the relevant secondary map.  As with the
    375    main primary map, the secondary may be either a real secondary, or
    376    one of the three distinguished secondaries.  DO NOT CHANGE THIS
    377    LAYOUT: the first word has to be the key for OSet fast lookups.
    378 */
    379 typedef
    380    struct {
    381       Addr    base;
    382       SecMap* sm;
    383    }
    384    AuxMapEnt;
    385 
    386 /* Tunable parameter: How big is the L1 queue? */
    387 #define N_AUXMAP_L1 24
    388 
    389 /* Tunable parameter: How far along the L1 queue to insert
    390    entries resulting from L2 lookups? */
    391 #define AUXMAP_L1_INSERT_IX 12
    392 
    393 static struct {
    394           Addr       base;
    395           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
    396        }
    397        auxmap_L1[N_AUXMAP_L1];
    398 
    399 static OSet* auxmap_L2 = NULL;
    400 
    401 static void init_auxmap_L1_L2 ( void )
    402 {
    403    Int i;
    404    for (i = 0; i < N_AUXMAP_L1; i++) {
    405       auxmap_L1[i].base = 0;
    406       auxmap_L1[i].ent  = NULL;
    407    }
    408 
    409    tl_assert(0 == offsetof(AuxMapEnt,base));
    410    tl_assert(sizeof(Addr) == sizeof(void*));
    411    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
    412                                     /*fastCmp*/ NULL,
    413                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
    414 }
    415 
    416 /* Check representation invariants; if OK return NULL; else a
    417    descriptive bit of text.  Also return the number of
    418    non-distinguished secondary maps referred to from the auxiliary
    419    primary maps. */
    420 
    421 static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
    422 {
    423    Word i, j;
    424    /* On a 32-bit platform, the L2 and L1 tables should
    425       both remain empty forever.
    426 
    427       On a 64-bit platform:
    428       In the L2 table:
    429        all .base & 0xFFFF == 0
    430        all .base > MAX_PRIMARY_ADDRESS
    431       In the L1 table:
    432        all .base & 0xFFFF == 0
    433        all (.base > MAX_PRIMARY_ADDRESS
    434             .base & 0xFFFF == 0
    435             and .ent points to an AuxMapEnt with the same .base)
    436            or
    437            (.base == 0 and .ent == NULL)
    438    */
    439    *n_secmaps_found = 0;
    440    if (sizeof(void*) == 4) {
    441       /* 32-bit platform */
    442       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
    443          return "32-bit: auxmap_L2 is non-empty";
    444       for (i = 0; i < N_AUXMAP_L1; i++)
    445         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
    446       return "32-bit: auxmap_L1 is non-empty";
    447    } else {
    448       /* 64-bit platform */
    449       UWord elems_seen = 0;
    450       AuxMapEnt *elem, *res;
    451       AuxMapEnt key;
    452       /* L2 table */
    453       VG_(OSetGen_ResetIter)(auxmap_L2);
    454       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
    455          elems_seen++;
    456          if (0 != (elem->base & (Addr)0xFFFF))
    457             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
    458          if (elem->base <= MAX_PRIMARY_ADDRESS)
    459             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
    460          if (elem->sm == NULL)
    461             return "64-bit: .sm in _L2 is NULL";
    462          if (!is_distinguished_sm(elem->sm))
    463             (*n_secmaps_found)++;
    464       }
    465       if (elems_seen != n_auxmap_L2_nodes)
    466          return "64-bit: disagreement on number of elems in _L2";
    467       /* Check L1-L2 correspondence */
    468       for (i = 0; i < N_AUXMAP_L1; i++) {
    469          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
    470             continue;
    471          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
    472             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
    473          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
    474             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
    475          if (auxmap_L1[i].ent == NULL)
    476             return "64-bit: .ent is NULL in auxmap_L1";
    477          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
    478             return "64-bit: _L1 and _L2 bases are inconsistent";
    479          /* Look it up in auxmap_L2. */
    480          key.base = auxmap_L1[i].base;
    481          key.sm   = 0;
    482          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    483          if (res == NULL)
    484             return "64-bit: _L1 .base not found in _L2";
    485          if (res != auxmap_L1[i].ent)
    486             return "64-bit: _L1 .ent disagrees with _L2 entry";
    487       }
    488       /* Check L1 contains no duplicates */
    489       for (i = 0; i < N_AUXMAP_L1; i++) {
    490          if (auxmap_L1[i].base == 0)
    491             continue;
    492 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
    493             if (auxmap_L1[j].base == 0)
    494                continue;
    495             if (auxmap_L1[j].base == auxmap_L1[i].base)
    496                return "64-bit: duplicate _L1 .base entries";
    497          }
    498       }
    499    }
    500    return NULL; /* ok */
    501 }
    502 
    503 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
    504 {
    505    Word i;
    506    tl_assert(ent);
    507    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
    508    for (i = N_AUXMAP_L1-1; i > rank; i--)
    509       auxmap_L1[i] = auxmap_L1[i-1];
    510    auxmap_L1[rank].base = ent->base;
    511    auxmap_L1[rank].ent  = ent;
    512 }
    513 
    514 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
    515 {
    516    AuxMapEnt  key;
    517    AuxMapEnt* res;
    518    Word       i;
    519 
    520    tl_assert(a > MAX_PRIMARY_ADDRESS);
    521    a &= ~(Addr)0xFFFF;
    522 
    523    /* First search the front-cache, which is a self-organising
    524       list containing the most popular entries. */
    525 
    526    if (LIKELY(auxmap_L1[0].base == a))
    527       return auxmap_L1[0].ent;
    528    if (LIKELY(auxmap_L1[1].base == a)) {
    529       Addr       t_base = auxmap_L1[0].base;
    530       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
    531       auxmap_L1[0].base = auxmap_L1[1].base;
    532       auxmap_L1[0].ent  = auxmap_L1[1].ent;
    533       auxmap_L1[1].base = t_base;
    534       auxmap_L1[1].ent  = t_ent;
    535       return auxmap_L1[0].ent;
    536    }
    537 
    538    n_auxmap_L1_searches++;
    539 
    540    for (i = 0; i < N_AUXMAP_L1; i++) {
    541       if (auxmap_L1[i].base == a) {
    542          break;
    543       }
    544    }
    545    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
    546 
    547    n_auxmap_L1_cmps += (ULong)(i+1);
    548 
    549    if (i < N_AUXMAP_L1) {
    550       if (i > 0) {
    551          Addr       t_base = auxmap_L1[i-1].base;
    552          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
    553          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
    554          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
    555          auxmap_L1[i-0].base = t_base;
    556          auxmap_L1[i-0].ent  = t_ent;
    557          i--;
    558       }
    559       return auxmap_L1[i].ent;
    560    }
    561 
    562    n_auxmap_L2_searches++;
    563 
    564    /* First see if we already have it. */
    565    key.base = a;
    566    key.sm   = 0;
    567 
    568    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    569    if (res)
    570       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
    571    return res;
    572 }
    573 
    574 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
    575 {
    576    AuxMapEnt *nyu, *res;
    577 
    578    /* First see if we already have it. */
    579    res = maybe_find_in_auxmap( a );
    580    if (LIKELY(res))
    581       return res;
    582 
    583    /* Ok, there's no entry in the secondary map, so we'll have
    584       to allocate one. */
    585    a &= ~(Addr)0xFFFF;
    586 
    587    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
    588    tl_assert(nyu);
    589    nyu->base = a;
    590    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
    591    VG_(OSetGen_Insert)( auxmap_L2, nyu );
    592    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
    593    n_auxmap_L2_nodes++;
    594    return nyu;
    595 }
    596 
    597 /* --------------- SecMap fundamentals --------------- */
    598 
    599 // In all these, 'low' means it's definitely in the main primary map,
    600 // 'high' means it's definitely in the auxiliary table.
    601 
    602 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
    603 {
    604    UWord pm_off = a >> 16;
    605 #  if VG_DEBUG_MEMORY >= 1
    606    tl_assert(pm_off < N_PRIMARY_MAP);
    607 #  endif
    608    return &primary_map[ pm_off ];
    609 }
    610 
    611 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
    612 {
    613    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
    614    return &am->sm;
    615 }
    616 
    617 static SecMap** get_secmap_ptr ( Addr a )
    618 {
    619    return ( a <= MAX_PRIMARY_ADDRESS
    620           ? get_secmap_low_ptr(a)
    621           : get_secmap_high_ptr(a));
    622 }
    623 
    624 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
    625 {
    626    return *get_secmap_low_ptr(a);
    627 }
    628 
    629 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
    630 {
    631    return *get_secmap_high_ptr(a);
    632 }
    633 
    634 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
    635 {
    636    SecMap** p = get_secmap_low_ptr(a);
    637    if (UNLIKELY(is_distinguished_sm(*p)))
    638       *p = copy_for_writing(*p);
    639    return *p;
    640 }
    641 
    642 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
    643 {
    644    SecMap** p = get_secmap_high_ptr(a);
    645    if (UNLIKELY(is_distinguished_sm(*p)))
    646       *p = copy_for_writing(*p);
    647    return *p;
    648 }
    649 
    650 /* Produce the secmap for 'a', either from the primary map or by
    651    ensuring there is an entry for it in the aux primary map.  The
    652    secmap may be a distinguished one as the caller will only want to
    653    be able to read it.
    654 */
    655 static INLINE SecMap* get_secmap_for_reading ( Addr a )
    656 {
    657    return ( a <= MAX_PRIMARY_ADDRESS
    658           ? get_secmap_for_reading_low (a)
    659           : get_secmap_for_reading_high(a) );
    660 }
    661 
    662 /* Produce the secmap for 'a', either from the primary map or by
    663    ensuring there is an entry for it in the aux primary map.  The
    664    secmap may not be a distinguished one, since the caller will want
    665    to be able to write it.  If it is a distinguished secondary, make a
    666    writable copy of it, install it, and return the copy instead.  (COW
    667    semantics).
    668 */
    669 static SecMap* get_secmap_for_writing ( Addr a )
    670 {
    671    return ( a <= MAX_PRIMARY_ADDRESS
    672           ? get_secmap_for_writing_low (a)
    673           : get_secmap_for_writing_high(a) );
    674 }
    675 
    676 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
    677    allocate one if one doesn't already exist.  This is used by the
    678    leak checker.
    679 */
    680 static SecMap* maybe_get_secmap_for ( Addr a )
    681 {
    682    if (a <= MAX_PRIMARY_ADDRESS) {
    683       return get_secmap_for_reading_low(a);
    684    } else {
    685       AuxMapEnt* am = maybe_find_in_auxmap(a);
    686       return am ? am->sm : NULL;
    687    }
    688 }
    689 
    690 /* --------------- Fundamental functions --------------- */
    691 
    692 static INLINE
    693 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
    694 {
    695    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
    696    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
    697    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
    698 }
    699 
    700 static INLINE
    701 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
    702 {
    703    UInt shift;
    704    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    705    shift     =  (a & 2)   << 1;        // shift by 0 or 4
    706    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
    707    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
    708 }
    709 
    710 static INLINE
    711 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
    712 {
    713    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
    714    vabits8 >>= shift;                  // shift the two bits to the bottom
    715    return 0x3 & vabits8;               // mask out the rest
    716 }
    717 
    718 static INLINE
    719 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
    720 {
    721    UInt shift;
    722    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    723    shift = (a & 2) << 1;               // shift by 0 or 4
    724    vabits8 >>= shift;                  // shift the four bits to the bottom
    725    return 0xf & vabits8;               // mask out the rest
    726 }
    727 
    728 // Note that these four are only used in slow cases.  The fast cases do
    729 // clever things like combine the auxmap check (in
    730 // get_secmap_{read,writ}able) with alignment checks.
    731 
    732 // *** WARNING! ***
    733 // Any time this function is called, if it is possible that vabits2
    734 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
    735 // sec-V-bits table must also be set!
    736 static INLINE
    737 void set_vabits2 ( Addr a, UChar vabits2 )
    738 {
    739    SecMap* sm       = get_secmap_for_writing(a);
    740    UWord   sm_off   = SM_OFF(a);
    741    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
    742 }
    743 
    744 static INLINE
    745 UChar get_vabits2 ( Addr a )
    746 {
    747    SecMap* sm       = get_secmap_for_reading(a);
    748    UWord   sm_off   = SM_OFF(a);
    749    UChar   vabits8  = sm->vabits8[sm_off];
    750    return extract_vabits2_from_vabits8(a, vabits8);
    751 }
    752 
    753 // *** WARNING! ***
    754 // Any time this function is called, if it is possible that any of the
    755 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
    756 // corresponding entry(s) in the sec-V-bits table must also be set!
    757 static INLINE
    758 UChar get_vabits8_for_aligned_word32 ( Addr a )
    759 {
    760    SecMap* sm       = get_secmap_for_reading(a);
    761    UWord   sm_off   = SM_OFF(a);
    762    UChar   vabits8  = sm->vabits8[sm_off];
    763    return vabits8;
    764 }
    765 
    766 static INLINE
    767 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
    768 {
    769    SecMap* sm       = get_secmap_for_writing(a);
    770    UWord   sm_off   = SM_OFF(a);
    771    sm->vabits8[sm_off] = vabits8;
    772 }
    773 
    774 
    775 // Forward declarations
    776 static UWord get_sec_vbits8(Addr a);
    777 static void  set_sec_vbits8(Addr a, UWord vbits8);
    778 
    779 // Returns False if there was an addressability error.
    780 static INLINE
    781 Bool set_vbits8 ( Addr a, UChar vbits8 )
    782 {
    783    Bool  ok      = True;
    784    UChar vabits2 = get_vabits2(a);
    785    if ( VA_BITS2_NOACCESS != vabits2 ) {
    786       // Addressable.  Convert in-register format to in-memory format.
    787       // Also remove any existing sec V bit entry for the byte if no
    788       // longer necessary.
    789       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
    790       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
    791       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
    792                                                 set_sec_vbits8(a, vbits8);  }
    793       set_vabits2(a, vabits2);
    794 
    795    } else {
    796       // Unaddressable!  Do nothing -- when writing to unaddressable
    797       // memory it acts as a black hole, and the V bits can never be seen
    798       // again.  So we don't have to write them at all.
    799       ok = False;
    800    }
    801    return ok;
    802 }
    803 
    804 // Returns False if there was an addressability error.  In that case, we put
    805 // all defined bits into vbits8.
    806 static INLINE
    807 Bool get_vbits8 ( Addr a, UChar* vbits8 )
    808 {
    809    Bool  ok      = True;
    810    UChar vabits2 = get_vabits2(a);
    811 
    812    // Convert the in-memory format to in-register format.
    813    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
    814    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
    815    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
    816       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
    817       ok = False;
    818    } else {
    819       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
    820       *vbits8 = get_sec_vbits8(a);
    821    }
    822    return ok;
    823 }
    824 
    825 
    826 /* --------------- Secondary V bit table ------------ */
    827 
    828 // This table holds the full V bit pattern for partially-defined bytes
    829 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
    830 // memory.
    831 //
    832 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
    833 // then overwrite the same address with a fully defined byte, the sec-V-bit
    834 // node will not necessarily be removed.  This is because checking for
    835 // whether removal is necessary would slow down the fast paths.
    836 //
    837 // To avoid the stale nodes building up too much, we periodically (once the
    838 // table reaches a certain size) garbage collect (GC) the table by
    839 // traversing it and evicting any "sufficiently stale" nodes, ie. nodes that
    840 // are stale and haven't been touched for a certain number of collections.
    841 // If more than a certain proportion of nodes survived, we increase the
    842 // table size so that GCs occur less often.
    843 //
    844 // (So this a bit different to a traditional GC, where you definitely want
    845 // to remove any dead nodes.  It's more like we have a resizable cache and
    846 // we're trying to find the right balance how many elements to evict and how
    847 // big to make the cache.)
    848 //
    849 // This policy is designed to avoid bad table bloat in the worst case where
    850 // a program creates huge numbers of stale PDBs -- we would get this bloat
    851 // if we had no GC -- while handling well the case where a node becomes
    852 // stale but shortly afterwards is rewritten with a PDB and so becomes
    853 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
    854 // remove all stale nodes as soon as possible, we just end up re-adding a
    855 // lot of them in later again.  The "sufficiently stale" approach avoids
    856 // this.  (If a program has many live PDBs, performance will just suck,
    857 // there's no way around that.)
    858 
    859 static OSet* secVBitTable;
    860 
    861 // Stats
    862 static ULong sec_vbits_new_nodes = 0;
    863 static ULong sec_vbits_updates   = 0;
    864 
    865 // This must be a power of two;  this is checked in mc_pre_clo_init().
    866 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
    867 // a larger address range) they take more space but we can get multiple
    868 // partially-defined bytes in one if they are close to each other, reducing
    869 // the number of total nodes.  In practice sometimes they are clustered (eg.
    870 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
    871 // row), but often not.  So we choose something intermediate.
    872 #define BYTES_PER_SEC_VBIT_NODE     16
    873 
    874 // We make the table bigger if more than this many nodes survive a GC.
    875 #define MAX_SURVIVOR_PROPORTION  0.5
    876 
    877 // Each time we make the table bigger, we increase it by this much.
    878 #define TABLE_GROWTH_FACTOR      2
    879 
    880 // This defines "sufficiently stale" -- any node that hasn't been touched in
    881 // this many GCs will be removed.
    882 #define MAX_STALE_AGE            2
    883 
    884 // We GC the table when it gets this many nodes in it, ie. it's effectively
    885 // the table size.  It can change.
    886 static Int  secVBitLimit = 1024;
    887 
    888 // The number of GCs done, used to age sec-V-bit nodes for eviction.
    889 // Because it's unsigned, wrapping doesn't matter -- the right answer will
    890 // come out anyway.
    891 static UInt GCs_done = 0;
    892 
    893 typedef
    894    struct {
    895       Addr  a;
    896       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
    897       UInt  last_touched;
    898    }
    899    SecVBitNode;
    900 
    901 static OSet* createSecVBitTable(void)
    902 {
    903    return VG_(OSetGen_Create)( offsetof(SecVBitNode, a),
    904                                NULL, // use fast comparisons
    905                                VG_(malloc), "mc.cSVT.1 (sec VBit table)",
    906                                VG_(free) );
    907 }
    908 
    909 static void gcSecVBitTable(void)
    910 {
    911    OSet*        secVBitTable2;
    912    SecVBitNode* n;
    913    Int          i, n_nodes = 0, n_survivors = 0;
    914 
    915    GCs_done++;
    916 
    917    // Create the new table.
    918    secVBitTable2 = createSecVBitTable();
    919 
    920    // Traverse the table, moving fresh nodes into the new table.
    921    VG_(OSetGen_ResetIter)(secVBitTable);
    922    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
    923       Bool keep = False;
    924       if ( (GCs_done - n->last_touched) <= MAX_STALE_AGE ) {
    925          // Keep node if it's been touched recently enough (regardless of
    926          // freshness/staleness).
    927          keep = True;
    928       } else {
    929          // Keep node if any of its bytes are non-stale.  Using
    930          // get_vabits2() for the lookup is not very efficient, but I don't
    931          // think it matters.
    932          for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
    933             if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
    934                keep = True;      // Found a non-stale byte, so keep
    935                break;
    936             }
    937          }
    938       }
    939 
    940       if ( keep ) {
    941          // Insert a copy of the node into the new table.
    942          SecVBitNode* n2 =
    943             VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
    944          *n2 = *n;
    945          VG_(OSetGen_Insert)(secVBitTable2, n2);
    946       }
    947    }
    948 
    949    // Get the before and after sizes.
    950    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
    951    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
    952 
    953    // Destroy the old table, and put the new one in its place.
    954    VG_(OSetGen_Destroy)(secVBitTable);
    955    secVBitTable = secVBitTable2;
    956 
    957    if (VG_(clo_verbosity) > 1) {
    958       Char percbuf[6];
    959       VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf);
    960       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n",
    961                    n_nodes, n_survivors, percbuf);
    962    }
    963 
    964    // Increase table size if necessary.
    965    if (n_survivors > (secVBitLimit * MAX_SURVIVOR_PROPORTION)) {
    966       secVBitLimit *= TABLE_GROWTH_FACTOR;
    967       if (VG_(clo_verbosity) > 1)
    968          VG_(message)(Vg_DebugMsg, "memcheck GC: increase table size to %d\n",
    969                       secVBitLimit);
    970    }
    971 }
    972 
    973 static UWord get_sec_vbits8(Addr a)
    974 {
    975    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
    976    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
    977    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
    978    UChar        vbits8;
    979    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
    980    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
    981    // make it to the secondary V bits table.
    982    vbits8 = n->vbits8[amod];
    983    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
    984    return vbits8;
    985 }
    986 
    987 static void set_sec_vbits8(Addr a, UWord vbits8)
    988 {
    989    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
    990    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
    991    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
    992    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
    993    // make it to the secondary V bits table.
    994    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
    995    if (n) {
    996       n->vbits8[amod] = vbits8;     // update
    997       n->last_touched = GCs_done;
    998       sec_vbits_updates++;
    999    } else {
   1000       // New node:  assign the specific byte, make the rest invalid (they
   1001       // should never be read as-is, but be cautious).
   1002       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
   1003       n->a            = aAligned;
   1004       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
   1005          n->vbits8[i] = V_BITS8_UNDEFINED;
   1006       }
   1007       n->vbits8[amod] = vbits8;
   1008       n->last_touched = GCs_done;
   1009 
   1010       // Do a table GC if necessary.  Nb: do this before inserting the new
   1011       // node, to avoid erroneously GC'ing the new node.
   1012       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
   1013          gcSecVBitTable();
   1014       }
   1015 
   1016       // Insert the new node.
   1017       VG_(OSetGen_Insert)(secVBitTable, n);
   1018       sec_vbits_new_nodes++;
   1019 
   1020       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
   1021       if (n_secVBit_nodes > max_secVBit_nodes)
   1022          max_secVBit_nodes = n_secVBit_nodes;
   1023    }
   1024 }
   1025 
   1026 /* --------------- Endianness helpers --------------- */
   1027 
   1028 /* Returns the offset in memory of the byteno-th most significant byte
   1029    in a wordszB-sized word, given the specified endianness. */
   1030 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
   1031                                     UWord byteno ) {
   1032    return bigendian ? (wordszB-1-byteno) : byteno;
   1033 }
   1034 
   1035 
   1036 /* --------------- Ignored address ranges --------------- */
   1037 
   1038 #define M_IGNORE_RANGES 4
   1039 
   1040 typedef
   1041    struct {
   1042       Int  used;
   1043       Addr start[M_IGNORE_RANGES];
   1044       Addr end[M_IGNORE_RANGES];
   1045    }
   1046    IgnoreRanges;
   1047 
   1048 static IgnoreRanges ignoreRanges;
   1049 
   1050 INLINE Bool MC_(in_ignored_range) ( Addr a )
   1051 {
   1052    Int i;
   1053    if (LIKELY(ignoreRanges.used == 0))
   1054       return False;
   1055    for (i = 0; i < ignoreRanges.used; i++) {
   1056       if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i])
   1057          return True;
   1058    }
   1059    return False;
   1060 }
   1061 
   1062 /* Parse two Addr separated by a dash, or fail. */
   1063 
   1064 static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 )
   1065 {
   1066    Bool ok = VG_(parse_Addr) (ppc, result1);
   1067    if (!ok)
   1068       return False;
   1069    if (**ppc != '-')
   1070       return False;
   1071    (*ppc)++;
   1072    ok = VG_(parse_Addr) (ppc, result2);
   1073    if (!ok)
   1074       return False;
   1075    return True;
   1076 }
   1077 
   1078 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
   1079    fail. */
   1080 
   1081 static Bool parse_ignore_ranges ( UChar* str0 )
   1082 {
   1083    Addr start, end;
   1084    Bool ok;
   1085    UChar*  str = str0;
   1086    UChar** ppc = &str;
   1087    ignoreRanges.used = 0;
   1088    while (1) {
   1089       ok = parse_range(ppc, &start, &end);
   1090       if (!ok)
   1091          return False;
   1092       if (ignoreRanges.used >= M_IGNORE_RANGES)
   1093          return False;
   1094       ignoreRanges.start[ignoreRanges.used] = start;
   1095       ignoreRanges.end[ignoreRanges.used] = end;
   1096       ignoreRanges.used++;
   1097       if (**ppc == 0)
   1098          return True;
   1099       if (**ppc != ',')
   1100          return False;
   1101       (*ppc)++;
   1102    }
   1103    /*NOTREACHED*/
   1104    return False;
   1105 }
   1106 
   1107 
   1108 /* --------------- Load/store slow cases. --------------- */
   1109 
   1110 static
   1111 __attribute__((noinline))
   1112 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
   1113 {
   1114    /* Make up a 64-bit result V word, which contains the loaded data for
   1115       valid addresses and Defined for invalid addresses.  Iterate over
   1116       the bytes in the word, from the most significant down to the
   1117       least. */
   1118    ULong vbits64     = V_BITS64_UNDEFINED;
   1119    SizeT szB         = nBits / 8;
   1120    SSizeT i;                        // Must be signed.
   1121    SizeT n_addrs_bad = 0;
   1122    Addr  ai;
   1123    Bool  partial_load_exemption_applies;
   1124    UChar vbits8;
   1125    Bool  ok;
   1126 
   1127    PROF_EVENT(30, "mc_LOADVn_slow");
   1128 
   1129    /* ------------ BEGIN semi-fast cases ------------ */
   1130    /* These deal quickly-ish with the common auxiliary primary map
   1131       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1132       omitted without loss of correctness/functionality.  Note that in
   1133       both cases the "sizeof(void*) == 8" causes these cases to be
   1134       folded out by compilers on 32-bit platforms.  These are derived
   1135       from LOADV64 and LOADV32.
   1136    */
   1137    if (LIKELY(sizeof(void*) == 8
   1138                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1139       SecMap* sm       = get_secmap_for_reading(a);
   1140       UWord   sm_off16 = SM_OFF_16(a);
   1141       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1142       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
   1143          return V_BITS64_DEFINED;
   1144       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
   1145          return V_BITS64_UNDEFINED;
   1146       /* else fall into the slow case */
   1147    }
   1148    if (LIKELY(sizeof(void*) == 8
   1149                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1150       SecMap* sm = get_secmap_for_reading(a);
   1151       UWord sm_off = SM_OFF(a);
   1152       UWord vabits8 = sm->vabits8[sm_off];
   1153       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
   1154          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   1155       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
   1156          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   1157       /* else fall into slow case */
   1158    }
   1159    /* ------------ END semi-fast cases ------------ */
   1160 
   1161    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1162 
   1163    for (i = szB-1; i >= 0; i--) {
   1164       PROF_EVENT(31, "mc_LOADVn_slow(loop)");
   1165       ai = a + byte_offset_w(szB, bigendian, i);
   1166       ok = get_vbits8(ai, &vbits8);
   1167       if (!ok) n_addrs_bad++;
   1168       vbits64 <<= 8;
   1169       vbits64 |= vbits8;
   1170    }
   1171 
   1172    /* This is a hack which avoids producing errors for code which
   1173       insists in stepping along byte strings in aligned word-sized
   1174       chunks, and there is a partially defined word at the end.  (eg,
   1175       optimised strlen).  Such code is basically broken at least WRT
   1176       semantics of ANSI C, but sometimes users don't have the option
   1177       to fix it, and so this option is provided.  Note it is now
   1178       defaulted to not-engaged.
   1179 
   1180       A load from a partially-addressible place is allowed if:
   1181       - the command-line flag is set
   1182       - it's a word-sized, word-aligned load
   1183       - at least one of the addresses in the word *is* valid
   1184    */
   1185    partial_load_exemption_applies
   1186       = MC_(clo_partial_loads_ok) && szB == VG_WORDSIZE
   1187                                    && VG_IS_WORD_ALIGNED(a)
   1188                                    && n_addrs_bad < VG_WORDSIZE;
   1189 
   1190    if (n_addrs_bad > 0 && !partial_load_exemption_applies)
   1191       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1192 
   1193    return vbits64;
   1194 }
   1195 
   1196 
   1197 static
   1198 __attribute__((noinline))
   1199 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
   1200 {
   1201    SizeT szB = nBits / 8;
   1202    SizeT i, n_addrs_bad = 0;
   1203    UChar vbits8;
   1204    Addr  ai;
   1205    Bool  ok;
   1206 
   1207    PROF_EVENT(35, "mc_STOREVn_slow");
   1208 
   1209    /* ------------ BEGIN semi-fast cases ------------ */
   1210    /* These deal quickly-ish with the common auxiliary primary map
   1211       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1212       omitted without loss of correctness/functionality.  Note that in
   1213       both cases the "sizeof(void*) == 8" causes these cases to be
   1214       folded out by compilers on 32-bit platforms.  These are derived
   1215       from STOREV64 and STOREV32.
   1216    */
   1217    if (LIKELY(sizeof(void*) == 8
   1218                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1219       SecMap* sm       = get_secmap_for_reading(a);
   1220       UWord   sm_off16 = SM_OFF_16(a);
   1221       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1222       if (LIKELY( !is_distinguished_sm(sm) &&
   1223                           (VA_BITS16_DEFINED   == vabits16 ||
   1224                            VA_BITS16_UNDEFINED == vabits16) )) {
   1225          /* Handle common case quickly: a is suitably aligned, */
   1226          /* is mapped, and is addressible. */
   1227          // Convert full V-bits in register to compact 2-bit form.
   1228          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
   1229             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
   1230             return;
   1231          } else if (V_BITS64_UNDEFINED == vbytes) {
   1232             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
   1233             return;
   1234          }
   1235          /* else fall into the slow case */
   1236       }
   1237       /* else fall into the slow case */
   1238    }
   1239    if (LIKELY(sizeof(void*) == 8
   1240                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1241       SecMap* sm      = get_secmap_for_reading(a);
   1242       UWord   sm_off  = SM_OFF(a);
   1243       UWord   vabits8 = sm->vabits8[sm_off];
   1244       if (LIKELY( !is_distinguished_sm(sm) &&
   1245                           (VA_BITS8_DEFINED   == vabits8 ||
   1246                            VA_BITS8_UNDEFINED == vabits8) )) {
   1247          /* Handle common case quickly: a is suitably aligned, */
   1248          /* is mapped, and is addressible. */
   1249          // Convert full V-bits in register to compact 2-bit form.
   1250          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
   1251             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
   1252             return;
   1253          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
   1254             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   1255             return;
   1256          }
   1257          /* else fall into the slow case */
   1258       }
   1259       /* else fall into the slow case */
   1260    }
   1261    /* ------------ END semi-fast cases ------------ */
   1262 
   1263    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1264 
   1265    /* Dump vbytes in memory, iterating from least to most significant
   1266       byte.  At the same time establish addressibility of the location. */
   1267    for (i = 0; i < szB; i++) {
   1268       PROF_EVENT(36, "mc_STOREVn_slow(loop)");
   1269       ai     = a + byte_offset_w(szB, bigendian, i);
   1270       vbits8 = vbytes & 0xff;
   1271       ok     = set_vbits8(ai, vbits8);
   1272       if (!ok) n_addrs_bad++;
   1273       vbytes >>= 8;
   1274    }
   1275 
   1276    /* If an address error has happened, report it. */
   1277    if (n_addrs_bad > 0)
   1278       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
   1279 }
   1280 
   1281 
   1282 /*------------------------------------------------------------*/
   1283 /*--- Setting permissions over address ranges.             ---*/
   1284 /*------------------------------------------------------------*/
   1285 
   1286 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
   1287                                       UWord dsm_num )
   1288 {
   1289    UWord    sm_off, sm_off16;
   1290    UWord    vabits2 = vabits16 & 0x3;
   1291    SizeT    lenA, lenB, len_to_next_secmap;
   1292    Addr     aNext;
   1293    SecMap*  sm;
   1294    SecMap** sm_ptr;
   1295    SecMap*  example_dsm;
   1296 
   1297    PROF_EVENT(150, "set_address_range_perms");
   1298 
   1299    /* Check the V+A bits make sense. */
   1300    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
   1301              VA_BITS16_UNDEFINED == vabits16 ||
   1302              VA_BITS16_DEFINED   == vabits16);
   1303 
   1304    // This code should never write PDBs;  ensure this.  (See comment above
   1305    // set_vabits2().)
   1306    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
   1307 
   1308    if (lenT == 0)
   1309       return;
   1310 
   1311    if (lenT > 256 * 1024 * 1024) {
   1312       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
   1313          Char* s = "unknown???";
   1314          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
   1315          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
   1316          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
   1317          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
   1318                                   "large range [0x%lx, 0x%lx) (%s)\n",
   1319                                   a, a + lenT, s);
   1320       }
   1321    }
   1322 
   1323 #ifndef PERF_FAST_SARP
   1324    /*------------------ debug-only case ------------------ */
   1325    {
   1326       // Endianness doesn't matter here because all bytes are being set to
   1327       // the same value.
   1328       // Nb: We don't have to worry about updating the sec-V-bits table
   1329       // after these set_vabits2() calls because this code never writes
   1330       // VA_BITS2_PARTDEFINED values.
   1331       SizeT i;
   1332       for (i = 0; i < lenT; i++) {
   1333          set_vabits2(a + i, vabits2);
   1334       }
   1335       return;
   1336    }
   1337 #endif
   1338 
   1339    /*------------------ standard handling ------------------ */
   1340 
   1341    /* Get the distinguished secondary that we might want
   1342       to use (part of the space-compression scheme). */
   1343    example_dsm = &sm_distinguished[dsm_num];
   1344 
   1345    // We have to handle ranges covering various combinations of partial and
   1346    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
   1347    // Cases marked with a '*' are common.
   1348    //
   1349    //   TYPE                                             PARTS USED
   1350    //   ----                                             ----------
   1351    // * one partial sec-map                  (p)         1
   1352    // - one whole sec-map                    (P)         2
   1353    //
   1354    // * two partial sec-maps                 (pp)        1,3
   1355    // - one partial, one whole sec-map       (pP)        1,2
   1356    // - one whole, one partial sec-map       (Pp)        2,3
   1357    // - two whole sec-maps                   (PP)        2,2
   1358    //
   1359    // * one partial, one whole, one partial  (pPp)       1,2,3
   1360    // - one partial, two whole               (pPP)       1,2,2
   1361    // - two whole, one partial               (PPp)       2,2,3
   1362    // - three whole                          (PPP)       2,2,2
   1363    //
   1364    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
   1365    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
   1366    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
   1367    // - N whole                              (PP...PP)   2,2...2,3
   1368 
   1369    // Break up total length (lenT) into two parts:  length in the first
   1370    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
   1371    aNext = start_of_this_sm(a) + SM_SIZE;
   1372    len_to_next_secmap = aNext - a;
   1373    if ( lenT <= len_to_next_secmap ) {
   1374       // Range entirely within one sec-map.  Covers almost all cases.
   1375       PROF_EVENT(151, "set_address_range_perms-single-secmap");
   1376       lenA = lenT;
   1377       lenB = 0;
   1378    } else if (is_start_of_sm(a)) {
   1379       // Range spans at least one whole sec-map, and starts at the beginning
   1380       // of a sec-map; skip to Part 2.
   1381       PROF_EVENT(152, "set_address_range_perms-startof-secmap");
   1382       lenA = 0;
   1383       lenB = lenT;
   1384       goto part2;
   1385    } else {
   1386       // Range spans two or more sec-maps, first one is partial.
   1387       PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
   1388       lenA = len_to_next_secmap;
   1389       lenB = lenT - lenA;
   1390    }
   1391 
   1392    //------------------------------------------------------------------------
   1393    // Part 1: Deal with the first sec_map.  Most of the time the range will be
   1394    // entirely within a sec_map and this part alone will suffice.  Also,
   1395    // doing it this way lets us avoid repeatedly testing for the crossing of
   1396    // a sec-map boundary within these loops.
   1397    //------------------------------------------------------------------------
   1398 
   1399    // If it's distinguished, make it undistinguished if necessary.
   1400    sm_ptr = get_secmap_ptr(a);
   1401    if (is_distinguished_sm(*sm_ptr)) {
   1402       if (*sm_ptr == example_dsm) {
   1403          // Sec-map already has the V+A bits that we want, so skip.
   1404          PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
   1405          a    = aNext;
   1406          lenA = 0;
   1407       } else {
   1408          PROF_EVENT(155, "set_address_range_perms-dist-sm1");
   1409          *sm_ptr = copy_for_writing(*sm_ptr);
   1410       }
   1411    }
   1412    sm = *sm_ptr;
   1413 
   1414    // 1 byte steps
   1415    while (True) {
   1416       if (VG_IS_8_ALIGNED(a)) break;
   1417       if (lenA < 1)           break;
   1418       PROF_EVENT(156, "set_address_range_perms-loop1a");
   1419       sm_off = SM_OFF(a);
   1420       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1421       a    += 1;
   1422       lenA -= 1;
   1423    }
   1424    // 8-aligned, 8 byte steps
   1425    while (True) {
   1426       if (lenA < 8) break;
   1427       PROF_EVENT(157, "set_address_range_perms-loop8a");
   1428       sm_off16 = SM_OFF_16(a);
   1429       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1430       a    += 8;
   1431       lenA -= 8;
   1432    }
   1433    // 1 byte steps
   1434    while (True) {
   1435       if (lenA < 1) break;
   1436       PROF_EVENT(158, "set_address_range_perms-loop1b");
   1437       sm_off = SM_OFF(a);
   1438       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1439       a    += 1;
   1440       lenA -= 1;
   1441    }
   1442 
   1443    // We've finished the first sec-map.  Is that it?
   1444    if (lenB == 0)
   1445       return;
   1446 
   1447    //------------------------------------------------------------------------
   1448    // Part 2: Fast-set entire sec-maps at a time.
   1449    //------------------------------------------------------------------------
   1450   part2:
   1451    // 64KB-aligned, 64KB steps.
   1452    // Nb: we can reach here with lenB < SM_SIZE
   1453    tl_assert(0 == lenA);
   1454    while (True) {
   1455       if (lenB < SM_SIZE) break;
   1456       tl_assert(is_start_of_sm(a));
   1457       PROF_EVENT(159, "set_address_range_perms-loop64K");
   1458       sm_ptr = get_secmap_ptr(a);
   1459       if (!is_distinguished_sm(*sm_ptr)) {
   1460          PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
   1461          // Free the non-distinguished sec-map that we're replacing.  This
   1462          // case happens moderately often, enough to be worthwhile.
   1463          VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
   1464       }
   1465       update_SM_counts(*sm_ptr, example_dsm);
   1466       // Make the sec-map entry point to the example DSM
   1467       *sm_ptr = example_dsm;
   1468       lenB -= SM_SIZE;
   1469       a    += SM_SIZE;
   1470    }
   1471 
   1472    // We've finished the whole sec-maps.  Is that it?
   1473    if (lenB == 0)
   1474       return;
   1475 
   1476    //------------------------------------------------------------------------
   1477    // Part 3: Finish off the final partial sec-map, if necessary.
   1478    //------------------------------------------------------------------------
   1479 
   1480    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
   1481 
   1482    // If it's distinguished, make it undistinguished if necessary.
   1483    sm_ptr = get_secmap_ptr(a);
   1484    if (is_distinguished_sm(*sm_ptr)) {
   1485       if (*sm_ptr == example_dsm) {
   1486          // Sec-map already has the V+A bits that we want, so stop.
   1487          PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
   1488          return;
   1489       } else {
   1490          PROF_EVENT(162, "set_address_range_perms-dist-sm2");
   1491          *sm_ptr = copy_for_writing(*sm_ptr);
   1492       }
   1493    }
   1494    sm = *sm_ptr;
   1495 
   1496    // 8-aligned, 8 byte steps
   1497    while (True) {
   1498       if (lenB < 8) break;
   1499       PROF_EVENT(163, "set_address_range_perms-loop8b");
   1500       sm_off16 = SM_OFF_16(a);
   1501       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1502       a    += 8;
   1503       lenB -= 8;
   1504    }
   1505    // 1 byte steps
   1506    while (True) {
   1507       if (lenB < 1) return;
   1508       PROF_EVENT(164, "set_address_range_perms-loop1c");
   1509       sm_off = SM_OFF(a);
   1510       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1511       a    += 1;
   1512       lenB -= 1;
   1513    }
   1514 }
   1515 
   1516 
   1517 /* --- Set permissions for arbitrary address ranges --- */
   1518 
   1519 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
   1520 {
   1521    PROF_EVENT(40, "MC_(make_mem_noaccess)");
   1522    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
   1523    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
   1524    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1525       ocache_sarp_Clear_Origins ( a, len );
   1526 }
   1527 
   1528 static void make_mem_undefined ( Addr a, SizeT len )
   1529 {
   1530    PROF_EVENT(41, "make_mem_undefined");
   1531    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
   1532    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1533 }
   1534 
   1535 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
   1536 {
   1537    PROF_EVENT(41, "MC_(make_mem_undefined)");
   1538    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
   1539    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1540    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1541       ocache_sarp_Set_Origins ( a, len, otag );
   1542 }
   1543 
   1544 static
   1545 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
   1546                                           ThreadId tid, UInt okind )
   1547 {
   1548    UInt        ecu;
   1549    ExeContext* here;
   1550    /* VG_(record_ExeContext) checks for validity of tid, and asserts
   1551       if it is invalid.  So no need to do it here. */
   1552    tl_assert(okind <= 3);
   1553    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
   1554    tl_assert(here);
   1555    ecu = VG_(get_ECU_from_ExeContext)(here);
   1556    tl_assert(VG_(is_plausible_ECU)(ecu));
   1557    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
   1558 }
   1559 
   1560 static
   1561 void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) {
   1562    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
   1563 }
   1564 
   1565 
   1566 void MC_(make_mem_defined) ( Addr a, SizeT len )
   1567 {
   1568    PROF_EVENT(42, "MC_(make_mem_defined)");
   1569    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
   1570    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
   1571    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1572       ocache_sarp_Clear_Origins ( a, len );
   1573 }
   1574 
   1575 /* For each byte in [a,a+len), if the byte is addressable, make it be
   1576    defined, but if it isn't addressible, leave it alone.  In other
   1577    words a version of MC_(make_mem_defined) that doesn't mess with
   1578    addressibility.  Low-performance implementation. */
   1579 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
   1580 {
   1581    SizeT i;
   1582    UChar vabits2;
   1583    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
   1584    for (i = 0; i < len; i++) {
   1585       vabits2 = get_vabits2( a+i );
   1586       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
   1587          set_vabits2(a+i, VA_BITS2_DEFINED);
   1588          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1589             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1590          }
   1591       }
   1592    }
   1593 }
   1594 
   1595 /* Similarly (needed for mprotect handling ..) */
   1596 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
   1597 {
   1598    SizeT i;
   1599    UChar vabits2;
   1600    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
   1601    for (i = 0; i < len; i++) {
   1602       vabits2 = get_vabits2( a+i );
   1603       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
   1604          set_vabits2(a+i, VA_BITS2_DEFINED);
   1605          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1606             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1607          }
   1608       }
   1609    }
   1610 }
   1611 
   1612 /* --- Block-copy permissions (needed for implementing realloc() and
   1613        sys_mremap). --- */
   1614 
   1615 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
   1616 {
   1617    SizeT i, j;
   1618    UChar vabits2, vabits8;
   1619    Bool  aligned, nooverlap;
   1620 
   1621    DEBUG("MC_(copy_address_range_state)\n");
   1622    PROF_EVENT(50, "MC_(copy_address_range_state)");
   1623 
   1624    if (len == 0 || src == dst)
   1625       return;
   1626 
   1627    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
   1628    nooverlap = src+len <= dst || dst+len <= src;
   1629 
   1630    if (nooverlap && aligned) {
   1631 
   1632       /* Vectorised fast case, when no overlap and suitably aligned */
   1633       /* vector loop */
   1634       i = 0;
   1635       while (len >= 4) {
   1636          vabits8 = get_vabits8_for_aligned_word32( src+i );
   1637          set_vabits8_for_aligned_word32( dst+i, vabits8 );
   1638          if (LIKELY(VA_BITS8_DEFINED == vabits8
   1639                             || VA_BITS8_UNDEFINED == vabits8
   1640                             || VA_BITS8_NOACCESS == vabits8)) {
   1641             /* do nothing */
   1642          } else {
   1643             /* have to copy secondary map info */
   1644             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
   1645                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
   1646             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
   1647                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
   1648             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
   1649                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
   1650             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
   1651                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
   1652          }
   1653          i += 4;
   1654          len -= 4;
   1655       }
   1656       /* fixup loop */
   1657       while (len >= 1) {
   1658          vabits2 = get_vabits2( src+i );
   1659          set_vabits2( dst+i, vabits2 );
   1660          if (VA_BITS2_PARTDEFINED == vabits2) {
   1661             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1662          }
   1663          i++;
   1664          len--;
   1665       }
   1666 
   1667    } else {
   1668 
   1669       /* We have to do things the slow way */
   1670       if (src < dst) {
   1671          for (i = 0, j = len-1; i < len; i++, j--) {
   1672             PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
   1673             vabits2 = get_vabits2( src+j );
   1674             set_vabits2( dst+j, vabits2 );
   1675             if (VA_BITS2_PARTDEFINED == vabits2) {
   1676                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
   1677             }
   1678          }
   1679       }
   1680 
   1681       if (src > dst) {
   1682          for (i = 0; i < len; i++) {
   1683             PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
   1684             vabits2 = get_vabits2( src+i );
   1685             set_vabits2( dst+i, vabits2 );
   1686             if (VA_BITS2_PARTDEFINED == vabits2) {
   1687                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1688             }
   1689          }
   1690       }
   1691    }
   1692 
   1693 }
   1694 
   1695 
   1696 /*------------------------------------------------------------*/
   1697 /*--- Origin tracking stuff - cache basics                 ---*/
   1698 /*------------------------------------------------------------*/
   1699 
   1700 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
   1701    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1702 
   1703    Note that this implementation draws inspiration from the "origin
   1704    tracking by value piggybacking" scheme described in "Tracking Bad
   1705    Apples: Reporting the Origin of Null and Undefined Value Errors"
   1706    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
   1707    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
   1708    implemented completely differently.
   1709 
   1710    Origin tags and ECUs -- about the shadow values
   1711    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1712 
   1713    This implementation tracks the defining point of all uninitialised
   1714    values using so called "origin tags", which are 32-bit integers,
   1715    rather than using the values themselves to encode the origins.  The
   1716    latter, so-called value piggybacking", is what the OOPSLA07 paper
   1717    describes.
   1718 
   1719    Origin tags, as tracked by the machinery below, are 32-bit unsigned
   1720    ints (UInts), regardless of the machine's word size.  Each tag
   1721    comprises an upper 30-bit ECU field and a lower 2-bit
   1722    'kind' field.  The ECU field is a number given out by m_execontext
   1723    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
   1724    directly as an origin tag (otag), but in fact we want to put
   1725    additional information 'kind' field to indicate roughly where the
   1726    tag came from.  This helps print more understandable error messages
   1727    for the user -- it has no other purpose.  In summary:
   1728 
   1729    * Both ECUs and origin tags are represented as 32-bit words
   1730 
   1731    * m_execontext and the core-tool interface deal purely in ECUs.
   1732      They have no knowledge of origin tags - that is a purely
   1733      Memcheck-internal matter.
   1734 
   1735    * all valid ECUs have the lowest 2 bits zero and at least
   1736      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
   1737 
   1738    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
   1739      constants defined in mc_include.h.
   1740 
   1741    * to convert an otag back to an ECU, AND it with ~3
   1742 
   1743    One important fact is that no valid otag is zero.  A zero otag is
   1744    used by the implementation to indicate "no origin", which could
   1745    mean that either the value is defined, or it is undefined but the
   1746    implementation somehow managed to lose the origin.
   1747 
   1748    The ECU used for memory created by malloc etc is derived from the
   1749    stack trace at the time the malloc etc happens.  This means the
   1750    mechanism can show the exact allocation point for heap-created
   1751    uninitialised values.
   1752 
   1753    In contrast, it is simply too expensive to create a complete
   1754    backtrace for each stack allocation.  Therefore we merely use a
   1755    depth-1 backtrace for stack allocations, which can be done once at
   1756    translation time, rather than N times at run time.  The result of
   1757    this is that, for stack created uninitialised values, Memcheck can
   1758    only show the allocating function, and not what called it.
   1759    Furthermore, compilers tend to move the stack pointer just once at
   1760    the start of the function, to allocate all locals, and so in fact
   1761    the stack origin almost always simply points to the opening brace
   1762    of the function.  Net result is, for stack origins, the mechanism
   1763    can tell you in which function the undefined value was created, but
   1764    that's all.  Users will need to carefully check all locals in the
   1765    specified function.
   1766 
   1767    Shadowing registers and memory
   1768    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1769 
   1770    Memory is shadowed using a two level cache structure (ocacheL1 and
   1771    ocacheL2).  Memory references are first directed to ocacheL1.  This
   1772    is a traditional 2-way set associative cache with 32-byte lines and
   1773    approximate LRU replacement within each set.
   1774 
   1775    A naive implementation would require storing one 32 bit otag for
   1776    each byte of memory covered, a 4:1 space overhead.  Instead, there
   1777    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
   1778    that shows which of the 4 bytes have that shadow value and which
   1779    have a shadow value of zero (indicating no origin).  Hence a lot of
   1780    space is saved, but the cost is that only one different origin per
   1781    4 bytes of address space can be represented.  This is a source of
   1782    imprecision, but how much of a problem it really is remains to be
   1783    seen.
   1784 
   1785    A cache line that contains all zeroes ("no origins") contains no
   1786    useful information, and can be ejected from the L1 cache "for
   1787    free", in the sense that a read miss on the L1 causes a line of
   1788    zeroes to be installed.  However, ejecting a line containing
   1789    nonzeroes risks losing origin information permanently.  In order to
   1790    prevent such lossage, ejected nonzero lines are placed in a
   1791    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
   1792    lines.  This can grow arbitrarily large, and so should ensure that
   1793    Memcheck runs out of memory in preference to losing useful origin
   1794    info due to cache size limitations.
   1795 
   1796    Shadowing registers is a bit tricky, because the shadow values are
   1797    32 bits, regardless of the size of the register.  That gives a
   1798    problem for registers smaller than 32 bits.  The solution is to
   1799    find spaces in the guest state that are unused, and use those to
   1800    shadow guest state fragments smaller than 32 bits.  For example, on
   1801    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
   1802    shadow are allocated for the register's otag, then there are still
   1803    12 bytes left over which could be used to shadow 3 other values.
   1804 
   1805    This implies there is some non-obvious mapping from guest state
   1806    (start,length) pairs to the relevant shadow offset (for the origin
   1807    tags).  And it is unfortunately guest-architecture specific.  The
   1808    mapping is contained in mc_machine.c, which is quite lengthy but
   1809    straightforward.
   1810 
   1811    Instrumenting the IR
   1812    ~~~~~~~~~~~~~~~~~~~~
   1813 
   1814    Instrumentation is largely straightforward, and done by the
   1815    functions schemeE and schemeS in mc_translate.c.  These generate
   1816    code for handling the origin tags of expressions (E) and statements
   1817    (S) respectively.  The rather strange names are a reference to the
   1818    "compilation schemes" shown in Simon Peyton Jones' book "The
   1819    Implementation of Functional Programming Languages" (Prentice Hall,
   1820    1987, see
   1821    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
   1822 
   1823    schemeS merely arranges to move shadow values around the guest
   1824    state to track the incoming IR.  schemeE is largely trivial too.
   1825    The only significant point is how to compute the otag corresponding
   1826    to binary (or ternary, quaternary, etc) operator applications.  The
   1827    rule is simple: just take whichever value is larger (32-bit
   1828    unsigned max).  Constants get the special value zero.  Hence this
   1829    rule always propagates a nonzero (known) otag in preference to a
   1830    zero (unknown, or more likely, value-is-defined) tag, as we want.
   1831    If two different undefined values are inputs to a binary operator
   1832    application, then which is propagated is arbitrary, but that
   1833    doesn't matter, since the program is erroneous in using either of
   1834    the values, and so there's no point in attempting to propagate
   1835    both.
   1836 
   1837    Since constants are abstracted to (otag) zero, much of the
   1838    instrumentation code can be folded out without difficulty by the
   1839    generic post-instrumentation IR cleanup pass, using these rules:
   1840    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
   1841    constants is evaluated at JIT time.  And the resulting dead code
   1842    removal.  In practice this causes surprisingly few Max32Us to
   1843    survive through to backend code generation.
   1844 
   1845    Integration with the V-bits machinery
   1846    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1847 
   1848    This is again largely straightforward.  Mostly the otag and V bits
   1849    stuff are independent.  The only point of interaction is when the V
   1850    bits instrumenter creates a call to a helper function to report an
   1851    uninitialised value error -- in that case it must first use schemeE
   1852    to get hold of the origin tag expression for the value, and pass
   1853    that to the helper too.
   1854 
   1855    There is the usual stuff to do with setting address range
   1856    permissions.  When memory is painted undefined, we must also know
   1857    the origin tag to paint with, which involves some tedious plumbing,
   1858    particularly to do with the fast case stack handlers.  When memory
   1859    is painted defined or noaccess then the origin tags must be forced
   1860    to zero.
   1861 
   1862    One of the goals of the implementation was to ensure that the
   1863    non-origin tracking mode isn't slowed down at all.  To do this,
   1864    various functions to do with memory permissions setting (again,
   1865    mostly pertaining to the stack) are duplicated for the with- and
   1866    without-otag case.
   1867 
   1868    Dealing with stack redzones, and the NIA cache
   1869    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1870 
   1871    This is one of the few non-obvious parts of the implementation.
   1872 
   1873    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
   1874    reserved area below the stack pointer, that can be used as scratch
   1875    space by compiler generated code for functions.  In the Memcheck
   1876    sources this is referred to as the "stack redzone".  The important
   1877    thing here is that such redzones are considered volatile across
   1878    function calls and returns.  So Memcheck takes care to mark them as
   1879    undefined for each call and return, on the afflicted platforms.
   1880    Past experience shows this is essential in order to get reliable
   1881    messages about uninitialised values that come from the stack.
   1882 
   1883    So the question is, when we paint a redzone undefined, what origin
   1884    tag should we use for it?  Consider a function f() calling g().  If
   1885    we paint the redzone using an otag derived from the ExeContext of
   1886    the CALL/BL instruction in f, then any errors in g causing it to
   1887    use uninitialised values that happen to lie in the redzone, will be
   1888    reported as having their origin in f.  Which is highly confusing.
   1889 
   1890    The same applies for returns: if, on a return, we paint the redzone
   1891    using a origin tag derived from the ExeContext of the RET/BLR
   1892    instruction in g, then any later errors in f causing it to use
   1893    uninitialised values in the redzone, will be reported as having
   1894    their origin in g.  Which is just as confusing.
   1895 
   1896    To do it right, in both cases we need to use an origin tag which
   1897    pertains to the instruction which dynamically follows the CALL/BL
   1898    or RET/BLR.  In short, one derived from the NIA - the "next
   1899    instruction address".
   1900 
   1901    To make this work, Memcheck's redzone-painting helper,
   1902    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
   1903    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
   1904    ExeContext's ECU as the basis for the otag used to paint the
   1905    redzone.  The expensive part of this is converting an NIA into an
   1906    ECU, since this happens once for every call and every return.  So
   1907    we use a simple 511-line, 2-way set associative cache
   1908    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
   1909    the cost out.
   1910 
   1911    Further background comments
   1912    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1913 
   1914    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
   1915    > it really just the address of the relevant ExeContext?
   1916 
   1917    Well, it's not the address, but a value which has a 1-1 mapping
   1918    with ExeContexts, and is guaranteed not to be zero, since zero
   1919    denotes (to memcheck) "unknown origin or defined value".  So these
   1920    UInts are just numbers starting at 4 and incrementing by 4; each
   1921    ExeContext is given a number when it is created.  (*** NOTE this
   1922    confuses otags and ECUs; see comments above ***).
   1923 
   1924    Making these otags 32-bit regardless of the machine's word size
   1925    makes the 64-bit implementation easier (next para).  And it doesn't
   1926    really limit us in any way, since for the tags to overflow would
   1927    require that the program somehow caused 2^30-1 different
   1928    ExeContexts to be created, in which case it is probably in deep
   1929    trouble.  Not to mention V will have soaked up many tens of
   1930    gigabytes of memory merely to store them all.
   1931 
   1932    So having 64-bit origins doesn't really buy you anything, and has
   1933    the following downsides:
   1934 
   1935    Suppose that instead, an otag is a UWord.  This would mean that, on
   1936    a 64-bit target,
   1937 
   1938    1. It becomes hard to shadow any element of guest state which is
   1939       smaller than 8 bytes.  To do so means you'd need to find some
   1940       8-byte-sized hole in the guest state which you don't want to
   1941       shadow, and use that instead to hold the otag.  On ppc64, the
   1942       condition code register(s) are split into 20 UChar sized pieces,
   1943       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
   1944       and so that would entail finding 160 bytes somewhere else in the
   1945       guest state.
   1946 
   1947       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
   1948       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
   1949       same) and so I had to look for 4 untracked otag-sized areas in
   1950       the guest state to make that possible.
   1951 
   1952       The same problem exists of course when origin tags are only 32
   1953       bits, but it's less extreme.
   1954 
   1955    2. (More compelling) it doubles the size of the origin shadow
   1956       memory.  Given that the shadow memory is organised as a fixed
   1957       size cache, and that accuracy of tracking is limited by origins
   1958       falling out the cache due to space conflicts, this isn't good.
   1959 
   1960    > Another question: is the origin tracking perfect, or are there
   1961    > cases where it fails to determine an origin?
   1962 
   1963    It is imperfect for at least for the following reasons, and
   1964    probably more:
   1965 
   1966    * Insufficient capacity in the origin cache.  When a line is
   1967      evicted from the cache it is gone forever, and so subsequent
   1968      queries for the line produce zero, indicating no origin
   1969      information.  Interestingly, a line containing all zeroes can be
   1970      evicted "free" from the cache, since it contains no useful
   1971      information, so there is scope perhaps for some cleverer cache
   1972      management schemes.  (*** NOTE, with the introduction of the
   1973      second level origin tag cache, ocacheL2, this is no longer a
   1974      problem. ***)
   1975 
   1976    * The origin cache only stores one otag per 32-bits of address
   1977      space, plus 4 bits indicating which of the 4 bytes has that tag
   1978      and which are considered defined.  The result is that if two
   1979      undefined bytes in the same word are stored in memory, the first
   1980      stored byte's origin will be lost and replaced by the origin for
   1981      the second byte.
   1982 
   1983    * Nonzero origin tags for defined values.  Consider a binary
   1984      operator application op(x,y).  Suppose y is undefined (and so has
   1985      a valid nonzero origin tag), and x is defined, but erroneously
   1986      has a nonzero origin tag (defined values should have tag zero).
   1987      If the erroneous tag has a numeric value greater than y's tag,
   1988      then the rule for propagating origin tags though binary
   1989      operations, which is simply to take the unsigned max of the two
   1990      tags, will erroneously propagate x's tag rather than y's.
   1991 
   1992    * Some obscure uses of x86/amd64 byte registers can cause lossage
   1993      or confusion of origins.  %AH .. %DH are treated as different
   1994      from, and unrelated to, their parent registers, %EAX .. %EDX.
   1995      So some wierd sequences like
   1996 
   1997         movb undefined-value, %AH
   1998         movb defined-value, %AL
   1999         .. use %AX or %EAX ..
   2000 
   2001      will cause the origin attributed to %AH to be ignored, since %AL,
   2002      %AX, %EAX are treated as the same register, and %AH as a
   2003      completely separate one.
   2004 
   2005    But having said all that, it actually seems to work fairly well in
   2006    practice.
   2007 */
   2008 
   2009 static UWord stats_ocacheL1_find           = 0;
   2010 static UWord stats_ocacheL1_found_at_1     = 0;
   2011 static UWord stats_ocacheL1_found_at_N     = 0;
   2012 static UWord stats_ocacheL1_misses         = 0;
   2013 static UWord stats_ocacheL1_lossage        = 0;
   2014 static UWord stats_ocacheL1_movefwds       = 0;
   2015 
   2016 static UWord stats__ocacheL2_refs          = 0;
   2017 static UWord stats__ocacheL2_misses        = 0;
   2018 static UWord stats__ocacheL2_n_nodes_max   = 0;
   2019 
   2020 /* Cache of 32-bit values, one every 32 bits of address space */
   2021 
   2022 #define OC_BITS_PER_LINE 5
   2023 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
   2024 
   2025 static INLINE UWord oc_line_offset ( Addr a ) {
   2026    return (a >> 2) & (OC_W32S_PER_LINE - 1);
   2027 }
   2028 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
   2029    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
   2030 }
   2031 
   2032 #define OC_LINES_PER_SET 2
   2033 
   2034 #define OC_N_SET_BITS    20
   2035 #define OC_N_SETS        (1 << OC_N_SET_BITS)
   2036 
   2037 /* These settings give:
   2038    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
   2039    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
   2040 */
   2041 
   2042 #define OC_MOVE_FORWARDS_EVERY_BITS 7
   2043 
   2044 
   2045 typedef
   2046    struct {
   2047       Addr  tag;
   2048       UInt  w32[OC_W32S_PER_LINE];
   2049       UChar descr[OC_W32S_PER_LINE];
   2050    }
   2051    OCacheLine;
   2052 
   2053 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
   2054    in use, 'n' (nonzero) if it contains at least one valid origin tag,
   2055    and 'z' if all the represented tags are zero. */
   2056 static UChar classify_OCacheLine ( OCacheLine* line )
   2057 {
   2058    UWord i;
   2059    if (line->tag == 1/*invalid*/)
   2060       return 'e'; /* EMPTY */
   2061    tl_assert(is_valid_oc_tag(line->tag));
   2062    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2063       tl_assert(0 == ((~0xF) & line->descr[i]));
   2064       if (line->w32[i] > 0 && line->descr[i] > 0)
   2065          return 'n'; /* NONZERO - contains useful info */
   2066    }
   2067    return 'z'; /* ZERO - no useful info */
   2068 }
   2069 
   2070 typedef
   2071    struct {
   2072       OCacheLine line[OC_LINES_PER_SET];
   2073    }
   2074    OCacheSet;
   2075 
   2076 typedef
   2077    struct {
   2078       OCacheSet set[OC_N_SETS];
   2079    }
   2080    OCache;
   2081 
   2082 static OCache* ocacheL1 = NULL;
   2083 static UWord   ocacheL1_event_ctr = 0;
   2084 
   2085 static void init_ocacheL2 ( void ); /* fwds */
   2086 static void init_OCache ( void )
   2087 {
   2088    UWord line, set;
   2089    tl_assert(MC_(clo_mc_level) >= 3);
   2090    tl_assert(ocacheL1 == NULL);
   2091    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
   2092    if (ocacheL1 == NULL) {
   2093       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
   2094                                    sizeof(OCache) );
   2095    }
   2096    tl_assert(ocacheL1 != NULL);
   2097    for (set = 0; set < OC_N_SETS; set++) {
   2098       for (line = 0; line < OC_LINES_PER_SET; line++) {
   2099          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
   2100       }
   2101    }
   2102    init_ocacheL2();
   2103 }
   2104 
   2105 static void moveLineForwards ( OCacheSet* set, UWord lineno )
   2106 {
   2107    OCacheLine tmp;
   2108    stats_ocacheL1_movefwds++;
   2109    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
   2110    tmp = set->line[lineno-1];
   2111    set->line[lineno-1] = set->line[lineno];
   2112    set->line[lineno] = tmp;
   2113 }
   2114 
   2115 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
   2116    UWord i;
   2117    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2118       line->w32[i] = 0; /* NO ORIGIN */
   2119       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
   2120    }
   2121    line->tag = tag;
   2122 }
   2123 
   2124 //////////////////////////////////////////////////////////////
   2125 //// OCache backing store
   2126 
   2127 static OSet* ocacheL2 = NULL;
   2128 
   2129 static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) {
   2130    return VG_(malloc)(cc, szB);
   2131 }
   2132 static void ocacheL2_free ( void* v ) {
   2133    VG_(free)( v );
   2134 }
   2135 
   2136 /* Stats: # nodes currently in tree */
   2137 static UWord stats__ocacheL2_n_nodes = 0;
   2138 
   2139 static void init_ocacheL2 ( void )
   2140 {
   2141    tl_assert(!ocacheL2);
   2142    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
   2143    tl_assert(0 == offsetof(OCacheLine,tag));
   2144    ocacheL2
   2145       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
   2146                              NULL, /* fast cmp */
   2147                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free );
   2148    tl_assert(ocacheL2);
   2149    stats__ocacheL2_n_nodes = 0;
   2150 }
   2151 
   2152 /* Find line with the given tag in the tree, or NULL if not found. */
   2153 static OCacheLine* ocacheL2_find_tag ( Addr tag )
   2154 {
   2155    OCacheLine* line;
   2156    tl_assert(is_valid_oc_tag(tag));
   2157    stats__ocacheL2_refs++;
   2158    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
   2159    return line;
   2160 }
   2161 
   2162 /* Delete the line with the given tag from the tree, if it is present, and
   2163    free up the associated memory. */
   2164 static void ocacheL2_del_tag ( Addr tag )
   2165 {
   2166    OCacheLine* line;
   2167    tl_assert(is_valid_oc_tag(tag));
   2168    stats__ocacheL2_refs++;
   2169    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
   2170    if (line) {
   2171       VG_(OSetGen_FreeNode)(ocacheL2, line);
   2172       tl_assert(stats__ocacheL2_n_nodes > 0);
   2173       stats__ocacheL2_n_nodes--;
   2174    }
   2175 }
   2176 
   2177 /* Add a copy of the given line to the tree.  It must not already be
   2178    present. */
   2179 static void ocacheL2_add_line ( OCacheLine* line )
   2180 {
   2181    OCacheLine* copy;
   2182    tl_assert(is_valid_oc_tag(line->tag));
   2183    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
   2184    tl_assert(copy);
   2185    *copy = *line;
   2186    stats__ocacheL2_refs++;
   2187    VG_(OSetGen_Insert)( ocacheL2, copy );
   2188    stats__ocacheL2_n_nodes++;
   2189    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
   2190       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
   2191 }
   2192 
   2193 ////
   2194 //////////////////////////////////////////////////////////////
   2195 
   2196 __attribute__((noinline))
   2197 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
   2198 {
   2199    OCacheLine *victim, *inL2;
   2200    UChar c;
   2201    UWord line;
   2202    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2203    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2204    UWord tag     = a & tagmask;
   2205    tl_assert(setno >= 0 && setno < OC_N_SETS);
   2206 
   2207    /* we already tried line == 0; skip therefore. */
   2208    for (line = 1; line < OC_LINES_PER_SET; line++) {
   2209       if (ocacheL1->set[setno].line[line].tag == tag) {
   2210          if (line == 1) {
   2211             stats_ocacheL1_found_at_1++;
   2212          } else {
   2213             stats_ocacheL1_found_at_N++;
   2214          }
   2215          if (UNLIKELY(0 == (ocacheL1_event_ctr++
   2216                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
   2217             moveLineForwards( &ocacheL1->set[setno], line );
   2218             line--;
   2219          }
   2220          return &ocacheL1->set[setno].line[line];
   2221       }
   2222    }
   2223 
   2224    /* A miss.  Use the last slot.  Implicitly this means we're
   2225       ejecting the line in the last slot. */
   2226    stats_ocacheL1_misses++;
   2227    tl_assert(line == OC_LINES_PER_SET);
   2228    line--;
   2229    tl_assert(line > 0);
   2230 
   2231    /* First, move the to-be-ejected line to the L2 cache. */
   2232    victim = &ocacheL1->set[setno].line[line];
   2233    c = classify_OCacheLine(victim);
   2234    switch (c) {
   2235       case 'e':
   2236          /* the line is empty (has invalid tag); ignore it. */
   2237          break;
   2238       case 'z':
   2239          /* line contains zeroes.  We must ensure the backing store is
   2240             updated accordingly, either by copying the line there
   2241             verbatim, or by ensuring it isn't present there.  We
   2242             chosse the latter on the basis that it reduces the size of
   2243             the backing store. */
   2244          ocacheL2_del_tag( victim->tag );
   2245          break;
   2246       case 'n':
   2247          /* line contains at least one real, useful origin.  Copy it
   2248             to the backing store. */
   2249          stats_ocacheL1_lossage++;
   2250          inL2 = ocacheL2_find_tag( victim->tag );
   2251          if (inL2) {
   2252             *inL2 = *victim;
   2253          } else {
   2254             ocacheL2_add_line( victim );
   2255          }
   2256          break;
   2257       default:
   2258          tl_assert(0);
   2259    }
   2260 
   2261    /* Now we must reload the L1 cache from the backing tree, if
   2262       possible. */
   2263    tl_assert(tag != victim->tag); /* stay sane */
   2264    inL2 = ocacheL2_find_tag( tag );
   2265    if (inL2) {
   2266       /* We're in luck.  It's in the L2. */
   2267       ocacheL1->set[setno].line[line] = *inL2;
   2268    } else {
   2269       /* Missed at both levels of the cache hierarchy.  We have to
   2270          declare it as full of zeroes (unknown origins). */
   2271       stats__ocacheL2_misses++;
   2272       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
   2273    }
   2274 
   2275    /* Move it one forwards */
   2276    moveLineForwards( &ocacheL1->set[setno], line );
   2277    line--;
   2278 
   2279    return &ocacheL1->set[setno].line[line];
   2280 }
   2281 
   2282 static INLINE OCacheLine* find_OCacheLine ( Addr a )
   2283 {
   2284    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2285    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2286    UWord tag     = a & tagmask;
   2287 
   2288    stats_ocacheL1_find++;
   2289 
   2290    if (OC_ENABLE_ASSERTIONS) {
   2291       tl_assert(setno >= 0 && setno < OC_N_SETS);
   2292       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
   2293    }
   2294 
   2295    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
   2296       return &ocacheL1->set[setno].line[0];
   2297    }
   2298 
   2299    return find_OCacheLine_SLOW( a );
   2300 }
   2301 
   2302 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
   2303 {
   2304    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2305    //// Set the origins for a+0 .. a+7
   2306    { OCacheLine* line;
   2307      UWord lineoff = oc_line_offset(a);
   2308      if (OC_ENABLE_ASSERTIONS) {
   2309         tl_assert(lineoff >= 0
   2310                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2311      }
   2312      line = find_OCacheLine( a );
   2313      line->descr[lineoff+0] = 0xF;
   2314      line->descr[lineoff+1] = 0xF;
   2315      line->w32[lineoff+0]   = otag;
   2316      line->w32[lineoff+1]   = otag;
   2317    }
   2318    //// END inlined, specialised version of MC_(helperc_b_store8)
   2319 }
   2320 
   2321 
   2322 /*------------------------------------------------------------*/
   2323 /*--- Aligned fast case permission setters,                ---*/
   2324 /*--- for dealing with stacks                              ---*/
   2325 /*------------------------------------------------------------*/
   2326 
   2327 /*--------------------- 32-bit ---------------------*/
   2328 
   2329 /* Nb: by "aligned" here we mean 4-byte aligned */
   2330 
   2331 static INLINE void make_aligned_word32_undefined ( Addr a )
   2332 {
   2333    PROF_EVENT(300, "make_aligned_word32_undefined");
   2334 
   2335 #ifndef PERF_FAST_STACK2
   2336    make_mem_undefined(a, 4);
   2337 #else
   2338    {
   2339       UWord   sm_off;
   2340       SecMap* sm;
   2341 
   2342       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2343          PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
   2344          make_mem_undefined(a, 4);
   2345          return;
   2346       }
   2347 
   2348       sm                  = get_secmap_for_writing_low(a);
   2349       sm_off              = SM_OFF(a);
   2350       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   2351    }
   2352 #endif
   2353 }
   2354 
   2355 static INLINE
   2356 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
   2357 {
   2358    make_aligned_word32_undefined(a);
   2359    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2360    //// Set the origins for a+0 .. a+3
   2361    { OCacheLine* line;
   2362      UWord lineoff = oc_line_offset(a);
   2363      if (OC_ENABLE_ASSERTIONS) {
   2364         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2365      }
   2366      line = find_OCacheLine( a );
   2367      line->descr[lineoff] = 0xF;
   2368      line->w32[lineoff]   = otag;
   2369    }
   2370    //// END inlined, specialised version of MC_(helperc_b_store4)
   2371 }
   2372 
   2373 static INLINE
   2374 void make_aligned_word32_noaccess ( Addr a )
   2375 {
   2376    PROF_EVENT(310, "make_aligned_word32_noaccess");
   2377 
   2378 #ifndef PERF_FAST_STACK2
   2379    MC_(make_mem_noaccess)(a, 4);
   2380 #else
   2381    {
   2382       UWord   sm_off;
   2383       SecMap* sm;
   2384 
   2385       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2386          PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
   2387          MC_(make_mem_noaccess)(a, 4);
   2388          return;
   2389       }
   2390 
   2391       sm                  = get_secmap_for_writing_low(a);
   2392       sm_off              = SM_OFF(a);
   2393       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
   2394 
   2395       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2396       //// Set the origins for a+0 .. a+3.
   2397       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2398          OCacheLine* line;
   2399          UWord lineoff = oc_line_offset(a);
   2400          if (OC_ENABLE_ASSERTIONS) {
   2401             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2402          }
   2403          line = find_OCacheLine( a );
   2404          line->descr[lineoff] = 0;
   2405       }
   2406       //// END inlined, specialised version of MC_(helperc_b_store4)
   2407    }
   2408 #endif
   2409 }
   2410 
   2411 /*--------------------- 64-bit ---------------------*/
   2412 
   2413 /* Nb: by "aligned" here we mean 8-byte aligned */
   2414 
   2415 static INLINE void make_aligned_word64_undefined ( Addr a )
   2416 {
   2417    PROF_EVENT(320, "make_aligned_word64_undefined");
   2418 
   2419 #ifndef PERF_FAST_STACK2
   2420    make_mem_undefined(a, 8);
   2421 #else
   2422    {
   2423       UWord   sm_off16;
   2424       SecMap* sm;
   2425 
   2426       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2427          PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
   2428          make_mem_undefined(a, 8);
   2429          return;
   2430       }
   2431 
   2432       sm       = get_secmap_for_writing_low(a);
   2433       sm_off16 = SM_OFF_16(a);
   2434       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
   2435    }
   2436 #endif
   2437 }
   2438 
   2439 static INLINE
   2440 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
   2441 {
   2442    make_aligned_word64_undefined(a);
   2443    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2444    //// Set the origins for a+0 .. a+7
   2445    { OCacheLine* line;
   2446      UWord lineoff = oc_line_offset(a);
   2447      tl_assert(lineoff >= 0
   2448                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2449      line = find_OCacheLine( a );
   2450      line->descr[lineoff+0] = 0xF;
   2451      line->descr[lineoff+1] = 0xF;
   2452      line->w32[lineoff+0]   = otag;
   2453      line->w32[lineoff+1]   = otag;
   2454    }
   2455    //// END inlined, specialised version of MC_(helperc_b_store8)
   2456 }
   2457 
   2458 static INLINE
   2459 void make_aligned_word64_noaccess ( Addr a )
   2460 {
   2461    PROF_EVENT(330, "make_aligned_word64_noaccess");
   2462 
   2463 #ifndef PERF_FAST_STACK2
   2464    MC_(make_mem_noaccess)(a, 8);
   2465 #else
   2466    {
   2467       UWord   sm_off16;
   2468       SecMap* sm;
   2469 
   2470       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2471          PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
   2472          MC_(make_mem_noaccess)(a, 8);
   2473          return;
   2474       }
   2475 
   2476       sm       = get_secmap_for_writing_low(a);
   2477       sm_off16 = SM_OFF_16(a);
   2478       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
   2479 
   2480       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2481       //// Clear the origins for a+0 .. a+7.
   2482       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2483          OCacheLine* line;
   2484          UWord lineoff = oc_line_offset(a);
   2485          tl_assert(lineoff >= 0
   2486                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2487          line = find_OCacheLine( a );
   2488          line->descr[lineoff+0] = 0;
   2489          line->descr[lineoff+1] = 0;
   2490       }
   2491       //// END inlined, specialised version of MC_(helperc_b_store8)
   2492    }
   2493 #endif
   2494 }
   2495 
   2496 
   2497 /*------------------------------------------------------------*/
   2498 /*--- Stack pointer adjustment                             ---*/
   2499 /*------------------------------------------------------------*/
   2500 
   2501 #ifdef PERF_FAST_STACK
   2502 #  define MAYBE_USED
   2503 #else
   2504 #  define MAYBE_USED __attribute__((unused))
   2505 #endif
   2506 
   2507 /*--------------- adjustment by 4 bytes ---------------*/
   2508 
   2509 MAYBE_USED
   2510 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
   2511 {
   2512    UInt otag = ecu | MC_OKIND_STACK;
   2513    PROF_EVENT(110, "new_mem_stack_4");
   2514    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2515       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2516    } else {
   2517       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
   2518    }
   2519 }
   2520 
   2521 MAYBE_USED
   2522 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
   2523 {
   2524    PROF_EVENT(110, "new_mem_stack_4");
   2525    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2526       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2527    } else {
   2528       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
   2529    }
   2530 }
   2531 
   2532 MAYBE_USED
   2533 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
   2534 {
   2535    PROF_EVENT(120, "die_mem_stack_4");
   2536    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2537       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2538    } else {
   2539       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
   2540    }
   2541 }
   2542 
   2543 /*--------------- adjustment by 8 bytes ---------------*/
   2544 
   2545 MAYBE_USED
   2546 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
   2547 {
   2548    UInt otag = ecu | MC_OKIND_STACK;
   2549    PROF_EVENT(111, "new_mem_stack_8");
   2550    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2551       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2552    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2553       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2554       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2555    } else {
   2556       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
   2557    }
   2558 }
   2559 
   2560 MAYBE_USED
   2561 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
   2562 {
   2563    PROF_EVENT(111, "new_mem_stack_8");
   2564    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2565       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2566    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2567       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2568       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2569    } else {
   2570       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
   2571    }
   2572 }
   2573 
   2574 MAYBE_USED
   2575 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
   2576 {
   2577    PROF_EVENT(121, "die_mem_stack_8");
   2578    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2579       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2580    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2581       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2582       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2583    } else {
   2584       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
   2585    }
   2586 }
   2587 
   2588 /*--------------- adjustment by 12 bytes ---------------*/
   2589 
   2590 MAYBE_USED
   2591 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
   2592 {
   2593    UInt otag = ecu | MC_OKIND_STACK;
   2594    PROF_EVENT(112, "new_mem_stack_12");
   2595    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2596       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2597       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2598    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2599       /* from previous test we don't have 8-alignment at offset +0,
   2600          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2601          do 4 at +0 and then 8 at +4/. */
   2602       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2603       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2604    } else {
   2605       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
   2606    }
   2607 }
   2608 
   2609 MAYBE_USED
   2610 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
   2611 {
   2612    PROF_EVENT(112, "new_mem_stack_12");
   2613    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2614       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2615       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2616    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2617       /* from previous test we don't have 8-alignment at offset +0,
   2618          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2619          do 4 at +0 and then 8 at +4/. */
   2620       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2621       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2622    } else {
   2623       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
   2624    }
   2625 }
   2626 
   2627 MAYBE_USED
   2628 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
   2629 {
   2630    PROF_EVENT(122, "die_mem_stack_12");
   2631    /* Note the -12 in the test */
   2632    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
   2633       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
   2634          -4. */
   2635       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2636       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2637    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2638       /* We have 4-alignment at +0, but we don't have 8-alignment at
   2639          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
   2640          and then 8 at -8. */
   2641       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2642       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2643    } else {
   2644       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
   2645    }
   2646 }
   2647 
   2648 /*--------------- adjustment by 16 bytes ---------------*/
   2649 
   2650 MAYBE_USED
   2651 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
   2652 {
   2653    UInt otag = ecu | MC_OKIND_STACK;
   2654    PROF_EVENT(113, "new_mem_stack_16");
   2655    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2656       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2657       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2658       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2659    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2660       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2661          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2662       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2663       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2664       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2665    } else {
   2666       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
   2667    }
   2668 }
   2669 
   2670 MAYBE_USED
   2671 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
   2672 {
   2673    PROF_EVENT(113, "new_mem_stack_16");
   2674    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2675       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2676       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2677       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2678    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2679       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2680          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2681       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2682       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
   2683       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   2684    } else {
   2685       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
   2686    }
   2687 }
   2688 
   2689 MAYBE_USED
   2690 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
   2691 {
   2692    PROF_EVENT(123, "die_mem_stack_16");
   2693    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2694       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
   2695       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2696       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2697    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2698       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
   2699       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2700       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2701       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2702    } else {
   2703       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
   2704    }
   2705 }
   2706 
   2707 /*--------------- adjustment by 32 bytes ---------------*/
   2708 
   2709 MAYBE_USED
   2710 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
   2711 {
   2712    UInt otag = ecu | MC_OKIND_STACK;
   2713    PROF_EVENT(114, "new_mem_stack_32");
   2714    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2715       /* Straightforward */
   2716       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2717       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2718       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2719       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2720    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2721       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   2722          +0,+28. */
   2723       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2724       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2725       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2726       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
   2727       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
   2728    } else {
   2729       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
   2730    }
   2731 }
   2732 
   2733 MAYBE_USED
   2734 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
   2735 {
   2736    PROF_EVENT(114, "new_mem_stack_32");
   2737    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2738       /* Straightforward */
   2739       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2740       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2741       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   2742       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   2743    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2744       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   2745          +0,+28. */
   2746       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2747       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2748       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   2749       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
   2750       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
   2751    } else {
   2752       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
   2753    }
   2754 }
   2755 
   2756 MAYBE_USED
   2757 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
   2758 {
   2759    PROF_EVENT(124, "die_mem_stack_32");
   2760    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2761       /* Straightforward */
   2762       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2763       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   2764       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2765       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   2766    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2767       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
   2768          4 at -32,-4. */
   2769       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2770       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
   2771       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
   2772       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2773       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2774    } else {
   2775       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
   2776    }
   2777 }
   2778 
   2779 /*--------------- adjustment by 112 bytes ---------------*/
   2780 
   2781 MAYBE_USED
   2782 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
   2783 {
   2784    UInt otag = ecu | MC_OKIND_STACK;
   2785    PROF_EVENT(115, "new_mem_stack_112");
   2786    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2787       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2788       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2789       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2790       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2791       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   2792       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   2793       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   2794       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   2795       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   2796       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   2797       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   2798       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   2799       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   2800       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   2801    } else {
   2802       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
   2803    }
   2804 }
   2805 
   2806 MAYBE_USED
   2807 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
   2808 {
   2809    PROF_EVENT(115, "new_mem_stack_112");
   2810    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2811       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2812       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2813       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   2814       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   2815       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   2816       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   2817       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   2818       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   2819       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   2820       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   2821       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   2822       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   2823       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   2824       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   2825    } else {
   2826       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
   2827    }
   2828 }
   2829 
   2830 MAYBE_USED
   2831 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
   2832 {
   2833    PROF_EVENT(125, "die_mem_stack_112");
   2834    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2835       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   2836       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   2837       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   2838       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   2839       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   2840       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   2841       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   2842       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   2843       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   2844       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   2845       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2846       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   2847       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2848       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   2849    } else {
   2850       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
   2851    }
   2852 }
   2853 
   2854 /*--------------- adjustment by 128 bytes ---------------*/
   2855 
   2856 MAYBE_USED
   2857 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
   2858 {
   2859    UInt otag = ecu | MC_OKIND_STACK;
   2860    PROF_EVENT(116, "new_mem_stack_128");
   2861    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2862       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2863       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2864       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2865       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2866       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   2867       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   2868       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   2869       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   2870       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   2871       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   2872       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   2873       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   2874       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   2875       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   2876       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   2877       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   2878    } else {
   2879       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
   2880    }
   2881 }
   2882 
   2883 MAYBE_USED
   2884 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
   2885 {
   2886    PROF_EVENT(116, "new_mem_stack_128");
   2887    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2888       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2889       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2890       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   2891       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   2892       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   2893       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   2894       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   2895       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   2896       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   2897       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   2898       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   2899       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   2900       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   2901       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   2902       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   2903       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   2904    } else {
   2905       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
   2906    }
   2907 }
   2908 
   2909 MAYBE_USED
   2910 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
   2911 {
   2912    PROF_EVENT(126, "die_mem_stack_128");
   2913    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2914       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   2915       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   2916       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   2917       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   2918       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   2919       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   2920       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   2921       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   2922       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   2923       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   2924       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   2925       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   2926       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2927       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   2928       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2929       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   2930    } else {
   2931       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
   2932    }
   2933 }
   2934 
   2935 /*--------------- adjustment by 144 bytes ---------------*/
   2936 
   2937 MAYBE_USED
   2938 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
   2939 {
   2940    UInt otag = ecu | MC_OKIND_STACK;
   2941    PROF_EVENT(117, "new_mem_stack_144");
   2942    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2943       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   2944       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   2945       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   2946       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   2947       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   2948       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   2949       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   2950       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   2951       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   2952       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   2953       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   2954       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   2955       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   2956       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   2957       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   2958       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   2959       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   2960       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   2961    } else {
   2962       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
   2963    }
   2964 }
   2965 
   2966 MAYBE_USED
   2967 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
   2968 {
   2969    PROF_EVENT(117, "new_mem_stack_144");
   2970    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2971       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2972       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2973       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   2974       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   2975       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   2976       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   2977       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   2978       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   2979       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   2980       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   2981       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   2982       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   2983       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   2984       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   2985       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   2986       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   2987       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   2988       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   2989    } else {
   2990       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
   2991    }
   2992 }
   2993 
   2994 MAYBE_USED
   2995 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
   2996 {
   2997    PROF_EVENT(127, "die_mem_stack_144");
   2998    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2999       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3000       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3001       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3002       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3003       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3004       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3005       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3006       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3007       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3008       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3009       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3010       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3011       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3012       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3013       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3014       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3015       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3016       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3017    } else {
   3018       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
   3019    }
   3020 }
   3021 
   3022 /*--------------- adjustment by 160 bytes ---------------*/
   3023 
   3024 MAYBE_USED
   3025 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
   3026 {
   3027    UInt otag = ecu | MC_OKIND_STACK;
   3028    PROF_EVENT(118, "new_mem_stack_160");
   3029    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3030       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3031       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3032       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3033       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3034       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3035       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3036       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3037       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3038       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3039       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3040       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3041       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3042       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3043       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3044       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3045       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3046       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3047       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3048       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
   3049       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
   3050    } else {
   3051       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
   3052    }
   3053 }
   3054 
   3055 MAYBE_USED
   3056 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
   3057 {
   3058    PROF_EVENT(118, "new_mem_stack_160");
   3059    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3060       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3061       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3062       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3063       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3064       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3065       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3066       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3067       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3068       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3069       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3070       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3071       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3072       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3073       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3074       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3075       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3076       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3077       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3078       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
   3079       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
   3080    } else {
   3081       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
   3082    }
   3083 }
   3084 
   3085 MAYBE_USED
   3086 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
   3087 {
   3088    PROF_EVENT(128, "die_mem_stack_160");
   3089    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3090       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
   3091       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
   3092       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3093       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3094       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3095       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3096       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3097       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3098       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3099       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3100       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3101       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3102       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3103       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3104       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3105       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3106       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3107       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3108       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3109       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3110    } else {
   3111       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
   3112    }
   3113 }
   3114 
   3115 /*--------------- adjustment by N bytes ---------------*/
   3116 
   3117 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
   3118 {
   3119    UInt otag = ecu | MC_OKIND_STACK;
   3120    PROF_EVENT(115, "new_mem_stack_w_otag");
   3121    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
   3122 }
   3123 
   3124 static void mc_new_mem_stack ( Addr a, SizeT len )
   3125 {
   3126    PROF_EVENT(115, "new_mem_stack");
   3127    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
   3128 }
   3129 
   3130 static void mc_die_mem_stack ( Addr a, SizeT len )
   3131 {
   3132    PROF_EVENT(125, "die_mem_stack");
   3133    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
   3134 }
   3135 
   3136 
   3137 /* The AMD64 ABI says:
   3138 
   3139    "The 128-byte area beyond the location pointed to by %rsp is considered
   3140     to be reserved and shall not be modified by signal or interrupt
   3141     handlers.  Therefore, functions may use this area for temporary data
   3142     that is not needed across function calls.  In particular, leaf functions
   3143     may use this area for their entire stack frame, rather than adjusting
   3144     the stack pointer in the prologue and epilogue.  This area is known as
   3145     red zone [sic]."
   3146 
   3147    So after any call or return we need to mark this redzone as containing
   3148    undefined values.
   3149 
   3150    Consider this:  we're in function f.  f calls g.  g moves rsp down
   3151    modestly (say 16 bytes) and writes stuff all over the red zone, making it
   3152    defined.  g returns.  f is buggy and reads from parts of the red zone
   3153    that it didn't write on.  But because g filled that area in, f is going
   3154    to be picking up defined V bits and so any errors from reading bits of
   3155    the red zone it didn't write, will be missed.  The only solution I could
   3156    think of was to make the red zone undefined when g returns to f.
   3157 
   3158    This is in accordance with the ABI, which makes it clear the redzone
   3159    is volatile across function calls.
   3160 
   3161    The problem occurs the other way round too: f could fill the RZ up
   3162    with defined values and g could mistakenly read them.  So the RZ
   3163    also needs to be nuked on function calls.
   3164 */
   3165 
   3166 
   3167 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
   3168    improved so as to have a lower miss rate. */
   3169 
   3170 static UWord stats__nia_cache_queries = 0;
   3171 static UWord stats__nia_cache_misses  = 0;
   3172 
   3173 typedef
   3174    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
   3175             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
   3176    WCacheEnt;
   3177 
   3178 #define N_NIA_TO_ECU_CACHE 511
   3179 
   3180 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
   3181 
   3182 static void init_nia_to_ecu_cache ( void )
   3183 {
   3184    UWord       i;
   3185    Addr        zero_addr = 0;
   3186    ExeContext* zero_ec;
   3187    UInt        zero_ecu;
   3188    /* Fill all the slots with an entry for address zero, and the
   3189       relevant otags accordingly.  Hence the cache is initially filled
   3190       with valid data. */
   3191    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
   3192    tl_assert(zero_ec);
   3193    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
   3194    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
   3195    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
   3196       nia_to_ecu_cache[i].nia0 = zero_addr;
   3197       nia_to_ecu_cache[i].ecu0 = zero_ecu;
   3198       nia_to_ecu_cache[i].nia1 = zero_addr;
   3199       nia_to_ecu_cache[i].ecu1 = zero_ecu;
   3200    }
   3201 }
   3202 
   3203 static inline UInt convert_nia_to_ecu ( Addr nia )
   3204 {
   3205    UWord i;
   3206    UInt        ecu;
   3207    ExeContext* ec;
   3208 
   3209    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
   3210 
   3211    stats__nia_cache_queries++;
   3212    i = nia % N_NIA_TO_ECU_CACHE;
   3213    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
   3214 
   3215    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
   3216       return nia_to_ecu_cache[i].ecu0;
   3217 
   3218    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
   3219 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
   3220       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
   3221       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
   3222 #     undef SWAP
   3223       return nia_to_ecu_cache[i].ecu0;
   3224    }
   3225 
   3226    stats__nia_cache_misses++;
   3227    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
   3228    tl_assert(ec);
   3229    ecu = VG_(get_ECU_from_ExeContext)(ec);
   3230    tl_assert(VG_(is_plausible_ECU)(ecu));
   3231 
   3232    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
   3233    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
   3234 
   3235    nia_to_ecu_cache[i].nia0 = nia;
   3236    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
   3237    return ecu;
   3238 }
   3239 
   3240 
   3241 /* Note that this serves both the origin-tracking and
   3242    no-origin-tracking modes.  We assume that calls to it are
   3243    sufficiently infrequent that it isn't worth specialising for the
   3244    with/without origin-tracking cases. */
   3245 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
   3246 {
   3247    UInt otag;
   3248    tl_assert(sizeof(UWord) == sizeof(SizeT));
   3249    if (0)
   3250       VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
   3251                   base, len, nia );
   3252 
   3253    if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3254       UInt ecu = convert_nia_to_ecu ( nia );
   3255       tl_assert(VG_(is_plausible_ECU)(ecu));
   3256       otag = ecu | MC_OKIND_STACK;
   3257    } else {
   3258       tl_assert(nia == 0);
   3259       otag = 0;
   3260    }
   3261 
   3262 #  if 0
   3263    /* Really slow version */
   3264    MC_(make_mem_undefined)(base, len, otag);
   3265 #  endif
   3266 
   3267 #  if 0
   3268    /* Slow(ish) version, which is fairly easily seen to be correct.
   3269    */
   3270    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
   3271       make_aligned_word64_undefined(base +   0, otag);
   3272       make_aligned_word64_undefined(base +   8, otag);
   3273       make_aligned_word64_undefined(base +  16, otag);
   3274       make_aligned_word64_undefined(base +  24, otag);
   3275 
   3276       make_aligned_word64_undefined(base +  32, otag);
   3277       make_aligned_word64_undefined(base +  40, otag);
   3278       make_aligned_word64_undefined(base +  48, otag);
   3279       make_aligned_word64_undefined(base +  56, otag);
   3280 
   3281       make_aligned_word64_undefined(base +  64, otag);
   3282       make_aligned_word64_undefined(base +  72, otag);
   3283       make_aligned_word64_undefined(base +  80, otag);
   3284       make_aligned_word64_undefined(base +  88, otag);
   3285 
   3286       make_aligned_word64_undefined(base +  96, otag);
   3287       make_aligned_word64_undefined(base + 104, otag);
   3288       make_aligned_word64_undefined(base + 112, otag);
   3289       make_aligned_word64_undefined(base + 120, otag);
   3290    } else {
   3291       MC_(make_mem_undefined)(base, len, otag);
   3292    }
   3293 #  endif
   3294 
   3295    /* Idea is: go fast when
   3296          * 8-aligned and length is 128
   3297          * the sm is available in the main primary map
   3298          * the address range falls entirely with a single secondary map
   3299       If all those conditions hold, just update the V+A bits by writing
   3300       directly into the vabits array.  (If the sm was distinguished, this
   3301       will make a copy and then write to it.)
   3302    */
   3303 
   3304    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
   3305       /* Now we know the address range is suitably sized and aligned. */
   3306       UWord a_lo = (UWord)(base);
   3307       UWord a_hi = (UWord)(base + 128 - 1);
   3308       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3309       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3310          // Now we know the entire range is within the main primary map.
   3311          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3312          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3313          /* Now we know that the entire address range falls within a
   3314             single secondary map, and that that secondary 'lives' in
   3315             the main primary map. */
   3316          if (LIKELY(sm == sm_hi)) {
   3317             // Finally, we know that the range is entirely within one secmap.
   3318             UWord   v_off = SM_OFF(a_lo);
   3319             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3320             p[ 0] = VA_BITS16_UNDEFINED;
   3321             p[ 1] = VA_BITS16_UNDEFINED;
   3322             p[ 2] = VA_BITS16_UNDEFINED;
   3323             p[ 3] = VA_BITS16_UNDEFINED;
   3324             p[ 4] = VA_BITS16_UNDEFINED;
   3325             p[ 5] = VA_BITS16_UNDEFINED;
   3326             p[ 6] = VA_BITS16_UNDEFINED;
   3327             p[ 7] = VA_BITS16_UNDEFINED;
   3328             p[ 8] = VA_BITS16_UNDEFINED;
   3329             p[ 9] = VA_BITS16_UNDEFINED;
   3330             p[10] = VA_BITS16_UNDEFINED;
   3331             p[11] = VA_BITS16_UNDEFINED;
   3332             p[12] = VA_BITS16_UNDEFINED;
   3333             p[13] = VA_BITS16_UNDEFINED;
   3334             p[14] = VA_BITS16_UNDEFINED;
   3335             p[15] = VA_BITS16_UNDEFINED;
   3336             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3337                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3338                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3339                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3340                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3341                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3342                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3343                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3344                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3345                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3346                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3347                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3348                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3349                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3350                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3351                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3352                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3353             }
   3354             return;
   3355          }
   3356       }
   3357    }
   3358 
   3359    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
   3360    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
   3361       /* Now we know the address range is suitably sized and aligned. */
   3362       UWord a_lo = (UWord)(base);
   3363       UWord a_hi = (UWord)(base + 288 - 1);
   3364       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3365       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3366          // Now we know the entire range is within the main primary map.
   3367          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3368          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3369          /* Now we know that the entire address range falls within a
   3370             single secondary map, and that that secondary 'lives' in
   3371             the main primary map. */
   3372          if (LIKELY(sm == sm_hi)) {
   3373             // Finally, we know that the range is entirely within one secmap.
   3374             UWord   v_off = SM_OFF(a_lo);
   3375             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3376             p[ 0] = VA_BITS16_UNDEFINED;
   3377             p[ 1] = VA_BITS16_UNDEFINED;
   3378             p[ 2] = VA_BITS16_UNDEFINED;
   3379             p[ 3] = VA_BITS16_UNDEFINED;
   3380             p[ 4] = VA_BITS16_UNDEFINED;
   3381             p[ 5] = VA_BITS16_UNDEFINED;
   3382             p[ 6] = VA_BITS16_UNDEFINED;
   3383             p[ 7] = VA_BITS16_UNDEFINED;
   3384             p[ 8] = VA_BITS16_UNDEFINED;
   3385             p[ 9] = VA_BITS16_UNDEFINED;
   3386             p[10] = VA_BITS16_UNDEFINED;
   3387             p[11] = VA_BITS16_UNDEFINED;
   3388             p[12] = VA_BITS16_UNDEFINED;
   3389             p[13] = VA_BITS16_UNDEFINED;
   3390             p[14] = VA_BITS16_UNDEFINED;
   3391             p[15] = VA_BITS16_UNDEFINED;
   3392             p[16] = VA_BITS16_UNDEFINED;
   3393             p[17] = VA_BITS16_UNDEFINED;
   3394             p[18] = VA_BITS16_UNDEFINED;
   3395             p[19] = VA_BITS16_UNDEFINED;
   3396             p[20] = VA_BITS16_UNDEFINED;
   3397             p[21] = VA_BITS16_UNDEFINED;
   3398             p[22] = VA_BITS16_UNDEFINED;
   3399             p[23] = VA_BITS16_UNDEFINED;
   3400             p[24] = VA_BITS16_UNDEFINED;
   3401             p[25] = VA_BITS16_UNDEFINED;
   3402             p[26] = VA_BITS16_UNDEFINED;
   3403             p[27] = VA_BITS16_UNDEFINED;
   3404             p[28] = VA_BITS16_UNDEFINED;
   3405             p[29] = VA_BITS16_UNDEFINED;
   3406             p[30] = VA_BITS16_UNDEFINED;
   3407             p[31] = VA_BITS16_UNDEFINED;
   3408             p[32] = VA_BITS16_UNDEFINED;
   3409             p[33] = VA_BITS16_UNDEFINED;
   3410             p[34] = VA_BITS16_UNDEFINED;
   3411             p[35] = VA_BITS16_UNDEFINED;
   3412             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3413                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3414                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3415                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3416                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3417                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3418                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3419                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3420                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3421                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3422                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3423                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3424                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3425                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3426                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3427                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3428                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3429                set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
   3430                set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
   3431                set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
   3432                set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
   3433                set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
   3434                set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
   3435                set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
   3436                set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
   3437                set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
   3438                set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
   3439                set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
   3440                set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
   3441                set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
   3442                set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
   3443                set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
   3444                set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
   3445                set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
   3446                set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
   3447                set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
   3448                set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
   3449             }
   3450             return;
   3451          }
   3452       }
   3453    }
   3454 
   3455    /* else fall into slow case */
   3456    MC_(make_mem_undefined_w_otag)(base, len, otag);
   3457 }
   3458 
   3459 
   3460 /*------------------------------------------------------------*/
   3461 /*--- Checking memory                                      ---*/
   3462 /*------------------------------------------------------------*/
   3463 
   3464 typedef
   3465    enum {
   3466       MC_Ok = 5,
   3467       MC_AddrErr = 6,
   3468       MC_ValueErr = 7
   3469    }
   3470    MC_ReadResult;
   3471 
   3472 
   3473 /* Check permissions for address range.  If inadequate permissions
   3474    exist, *bad_addr is set to the offending address, so the caller can
   3475    know what it is. */
   3476 
   3477 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
   3478    returns False, and if bad_addr is non-NULL, sets *bad_addr to
   3479    indicate the lowest failing address.  Functions below are
   3480    similar. */
   3481 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
   3482 {
   3483    SizeT i;
   3484    UWord vabits2;
   3485 
   3486    PROF_EVENT(60, "check_mem_is_noaccess");
   3487    for (i = 0; i < len; i++) {
   3488       PROF_EVENT(61, "check_mem_is_noaccess(loop)");
   3489       vabits2 = get_vabits2(a);
   3490       if (VA_BITS2_NOACCESS != vabits2) {
   3491          if (bad_addr != NULL) *bad_addr = a;
   3492          return False;
   3493       }
   3494       a++;
   3495    }
   3496    return True;
   3497 }
   3498 
   3499 static Bool is_mem_addressable ( Addr a, SizeT len,
   3500                                  /*OUT*/Addr* bad_addr )
   3501 {
   3502    SizeT i;
   3503    UWord vabits2;
   3504 
   3505    PROF_EVENT(62, "is_mem_addressable");
   3506    for (i = 0; i < len; i++) {
   3507       PROF_EVENT(63, "is_mem_addressable(loop)");
   3508       vabits2 = get_vabits2(a);
   3509       if (VA_BITS2_NOACCESS == vabits2) {
   3510          if (bad_addr != NULL) *bad_addr = a;
   3511          return False;
   3512       }
   3513       a++;
   3514    }
   3515    return True;
   3516 }
   3517 
   3518 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
   3519                                       /*OUT*/Addr* bad_addr,
   3520                                       /*OUT*/UInt* otag )
   3521 {
   3522    SizeT i;
   3523    UWord vabits2;
   3524 
   3525    PROF_EVENT(64, "is_mem_defined");
   3526    DEBUG("is_mem_defined\n");
   3527 
   3528    if (otag)     *otag = 0;
   3529    if (bad_addr) *bad_addr = 0;
   3530    for (i = 0; i < len; i++) {
   3531       PROF_EVENT(65, "is_mem_defined(loop)");
   3532       vabits2 = get_vabits2(a);
   3533       if (VA_BITS2_DEFINED != vabits2) {
   3534          // Error!  Nb: Report addressability errors in preference to
   3535          // definedness errors.  And don't report definedeness errors unless
   3536          // --undef-value-errors=yes.
   3537          if (bad_addr) {
   3538             *bad_addr = a;
   3539          }
   3540          if (VA_BITS2_NOACCESS == vabits2) {
   3541             return MC_AddrErr;
   3542          }
   3543          if (MC_(clo_mc_level) >= 2) {
   3544             if (otag && MC_(clo_mc_level) == 3) {
   3545                *otag = MC_(helperc_b_load1)( a );
   3546             }
   3547             return MC_ValueErr;
   3548          }
   3549       }
   3550       a++;
   3551    }
   3552    return MC_Ok;
   3553 }
   3554 
   3555 
   3556 /* Like is_mem_defined but doesn't give up at the first uninitialised
   3557    byte -- the entire range is always checked.  This is important for
   3558    detecting errors in the case where a checked range strays into
   3559    invalid memory, but that fact is not detected by the ordinary
   3560    is_mem_defined(), because of an undefined section that precedes the
   3561    out of range section, possibly as a result of an alignment hole in
   3562    the checked data.  This version always checks the entire range and
   3563    can report both a definedness and an accessbility error, if
   3564    necessary. */
   3565 static void is_mem_defined_comprehensive (
   3566                Addr a, SizeT len,
   3567                /*OUT*/Bool* errorV,    /* is there a definedness err? */
   3568                /*OUT*/Addr* bad_addrV, /* if so where? */
   3569                /*OUT*/UInt* otagV,     /* and what's its otag? */
   3570                /*OUT*/Bool* errorA,    /* is there an addressability err? */
   3571                /*OUT*/Addr* bad_addrA  /* if so where? */
   3572             )
   3573 {
   3574    SizeT i;
   3575    UWord vabits2;
   3576    Bool  already_saw_errV = False;
   3577 
   3578    PROF_EVENT(64, "is_mem_defined"); // fixme
   3579    DEBUG("is_mem_defined_comprehensive\n");
   3580 
   3581    tl_assert(!(*errorV || *errorA));
   3582 
   3583    for (i = 0; i < len; i++) {
   3584       PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
   3585       vabits2 = get_vabits2(a);
   3586       switch (vabits2) {
   3587          case VA_BITS2_DEFINED:
   3588             a++;
   3589             break;
   3590          case VA_BITS2_UNDEFINED:
   3591          case VA_BITS2_PARTDEFINED:
   3592             if (!already_saw_errV) {
   3593                *errorV    = True;
   3594                *bad_addrV = a;
   3595                if (MC_(clo_mc_level) == 3) {
   3596                   *otagV = MC_(helperc_b_load1)( a );
   3597                } else {
   3598                   *otagV = 0;
   3599                }
   3600                already_saw_errV = True;
   3601             }
   3602             a++; /* keep going */
   3603             break;
   3604          case VA_BITS2_NOACCESS:
   3605             *errorA    = True;
   3606             *bad_addrA = a;
   3607             return; /* give up now. */
   3608          default:
   3609             tl_assert(0);
   3610       }
   3611    }
   3612 }
   3613 
   3614 
   3615 /* Check a zero-terminated ascii string.  Tricky -- don't want to
   3616    examine the actual bytes, to find the end, until we're sure it is
   3617    safe to do so. */
   3618 
   3619 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
   3620 {
   3621    UWord vabits2;
   3622 
   3623    PROF_EVENT(66, "mc_is_defined_asciiz");
   3624    DEBUG("mc_is_defined_asciiz\n");
   3625 
   3626    if (otag)     *otag = 0;
   3627    if (bad_addr) *bad_addr = 0;
   3628    while (True) {
   3629       PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
   3630       vabits2 = get_vabits2(a);
   3631       if (VA_BITS2_DEFINED != vabits2) {
   3632          // Error!  Nb: Report addressability errors in preference to
   3633          // definedness errors.  And don't report definedeness errors unless
   3634          // --undef-value-errors=yes.
   3635          if (bad_addr) {
   3636             *bad_addr = a;
   3637          }
   3638          if (VA_BITS2_NOACCESS == vabits2) {
   3639             return MC_AddrErr;
   3640          }
   3641          if (MC_(clo_mc_level) >= 2) {
   3642             if (otag && MC_(clo_mc_level) == 3) {
   3643                *otag = MC_(helperc_b_load1)( a );
   3644             }
   3645             return MC_ValueErr;
   3646          }
   3647       }
   3648       /* Ok, a is safe to read. */
   3649       if (* ((UChar*)a) == 0) {
   3650          return MC_Ok;
   3651       }
   3652       a++;
   3653    }
   3654 }
   3655 
   3656 
   3657 /*------------------------------------------------------------*/
   3658 /*--- Memory event handlers                                ---*/
   3659 /*------------------------------------------------------------*/
   3660 
   3661 static
   3662 void check_mem_is_addressable ( CorePart part, ThreadId tid, Char* s,
   3663                                 Addr base, SizeT size )
   3664 {
   3665    Addr bad_addr;
   3666    Bool ok = is_mem_addressable ( base, size, &bad_addr );
   3667 
   3668    if (!ok) {
   3669       switch (part) {
   3670       case Vg_CoreSysCall:
   3671          MC_(record_memparam_error) ( tid, bad_addr,
   3672                                       /*isAddrErr*/True, s, 0/*otag*/ );
   3673          break;
   3674 
   3675       case Vg_CoreSignal:
   3676          MC_(record_core_mem_error)( tid, s );
   3677          break;
   3678 
   3679       default:
   3680          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
   3681       }
   3682    }
   3683 }
   3684 
   3685 static
   3686 void check_mem_is_defined ( CorePart part, ThreadId tid, Char* s,
   3687                             Addr base, SizeT size )
   3688 {
   3689    UInt otag = 0;
   3690    Addr bad_addr;
   3691    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
   3692 
   3693    if (MC_Ok != res) {
   3694       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   3695 
   3696       switch (part) {
   3697       case Vg_CoreSysCall:
   3698          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   3699                                       isAddrErr ? 0 : otag );
   3700          break;
   3701 
   3702       case Vg_CoreSysCallArgInMem:
   3703          MC_(record_regparam_error) ( tid, s, otag );
   3704          break;
   3705 
   3706       /* If we're being asked to jump to a silly address, record an error
   3707          message before potentially crashing the entire system. */
   3708       case Vg_CoreTranslate:
   3709          MC_(record_jump_error)( tid, bad_addr );
   3710          break;
   3711 
   3712       default:
   3713          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
   3714       }
   3715    }
   3716 }
   3717 
   3718 static
   3719 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
   3720                                    Char* s, Addr str )
   3721 {
   3722    MC_ReadResult res;
   3723    Addr bad_addr = 0;   // shut GCC up
   3724    UInt otag = 0;
   3725 
   3726    tl_assert(part == Vg_CoreSysCall);
   3727    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
   3728    if (MC_Ok != res) {
   3729       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   3730       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   3731                                    isAddrErr ? 0 : otag );
   3732    }
   3733 }
   3734 
   3735 /* Handling of mmap and mprotect is not as simple as it seems.
   3736 
   3737    The underlying semantics are that memory obtained from mmap is
   3738    always initialised, but may be inaccessible.  And changes to the
   3739    protection of memory do not change its contents and hence not its
   3740    definedness state.  Problem is we can't model
   3741    inaccessible-but-with-some-definedness state; once we mark memory
   3742    as inaccessible we lose all info about definedness, and so can't
   3743    restore that if it is later made accessible again.
   3744 
   3745    One obvious thing to do is this:
   3746 
   3747       mmap/mprotect NONE  -> noaccess
   3748       mmap/mprotect other -> defined
   3749 
   3750    The problem case here is: taking accessible memory, writing
   3751    uninitialised data to it, mprotecting it NONE and later mprotecting
   3752    it back to some accessible state causes the undefinedness to be
   3753    lost.
   3754 
   3755    A better proposal is:
   3756 
   3757      (1) mmap NONE       ->  make noaccess
   3758      (2) mmap other      ->  make defined
   3759 
   3760      (3) mprotect NONE   ->  # no change
   3761      (4) mprotect other  ->  change any "noaccess" to "defined"
   3762 
   3763    (2) is OK because memory newly obtained from mmap really is defined
   3764        (zeroed out by the kernel -- doing anything else would
   3765        constitute a massive security hole.)
   3766 
   3767    (1) is OK because the only way to make the memory usable is via
   3768        (4), in which case we also wind up correctly marking it all as
   3769        defined.
   3770 
   3771    (3) is the weak case.  We choose not to change memory state.
   3772        (presumably the range is in some mixture of "defined" and
   3773        "undefined", viz, accessible but with arbitrary V bits).  Doing
   3774        nothing means we retain the V bits, so that if the memory is
   3775        later mprotected "other", the V bits remain unchanged, so there
   3776        can be no false negatives.  The bad effect is that if there's
   3777        an access in the area, then MC cannot warn; but at least we'll
   3778        get a SEGV to show, so it's better than nothing.
   3779 
   3780    Consider the sequence (3) followed by (4).  Any memory that was
   3781    "defined" or "undefined" previously retains its state (as
   3782    required).  Any memory that was "noaccess" before can only have
   3783    been made that way by (1), and so it's OK to change it to
   3784    "defined".
   3785 
   3786    See https://bugs.kde.org/show_bug.cgi?id=205541
   3787    and https://bugs.kde.org/show_bug.cgi?id=210268
   3788 */
   3789 static
   3790 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
   3791                        ULong di_handle )
   3792 {
   3793    if (rr || ww || xx) {
   3794       /* (2) mmap/mprotect other -> defined */
   3795       MC_(make_mem_defined)(a, len);
   3796    } else {
   3797       /* (1) mmap/mprotect NONE  -> noaccess */
   3798       MC_(make_mem_noaccess)(a, len);
   3799    }
   3800 }
   3801 
   3802 static
   3803 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
   3804 {
   3805    if (rr || ww || xx) {
   3806       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
   3807       make_mem_defined_if_noaccess(a, len);
   3808    } else {
   3809       /* (3) mprotect NONE   ->  # no change */
   3810       /* do nothing */
   3811    }
   3812 }
   3813 
   3814 
   3815 static
   3816 void mc_new_mem_startup( Addr a, SizeT len,
   3817                          Bool rr, Bool ww, Bool xx, ULong di_handle )
   3818 {
   3819    // Because code is defined, initialised variables get put in the data
   3820    // segment and are defined, and uninitialised variables get put in the
   3821    // bss segment and are auto-zeroed (and so defined).
   3822    //
   3823    // It's possible that there will be padding between global variables.
   3824    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
   3825    // a program uses it, Memcheck will not complain.  This is arguably a
   3826    // false negative, but it's a grey area -- the behaviour is defined (the
   3827    // padding is zeroed) but it's probably not what the user intended.  And
   3828    // we can't avoid it.
   3829    //
   3830    // Note: we generally ignore RWX permissions, because we can't track them
   3831    // without requiring more than one A bit which would slow things down a
   3832    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
   3833    // So we mark any such pages as "unaddressable".
   3834    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
   3835          a, (ULong)len, rr, ww, xx);
   3836    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
   3837 }
   3838 
   3839 static
   3840 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
   3841 {
   3842    MC_(make_mem_defined)(a, len);
   3843 }
   3844 
   3845 
   3846 /*------------------------------------------------------------*/
   3847 /*--- Register event handlers                              ---*/
   3848 /*------------------------------------------------------------*/
   3849 
   3850 /* Try and get a nonzero origin for the guest state section of thread
   3851    tid characterised by (offset,size).  Return 0 if nothing to show
   3852    for it. */
   3853 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
   3854                                              Int offset, SizeT size )
   3855 {
   3856    Int   sh2off;
   3857    UChar area[6];
   3858    UInt  otag;
   3859    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
   3860    if (sh2off == -1)
   3861       return 0;  /* This piece of guest state is not tracked */
   3862    tl_assert(sh2off >= 0);
   3863    tl_assert(0 == (sh2off % 4));
   3864    area[0] = 0x31;
   3865    area[5] = 0x27;
   3866    VG_(get_shadow_regs_area)( tid, &area[1], 2/*shadowno*/,sh2off,4 );
   3867    tl_assert(area[0] == 0x31);
   3868    tl_assert(area[5] == 0x27);
   3869    otag = *(UInt*)&area[1];
   3870    return otag;
   3871 }
   3872 
   3873 
   3874 /* When some chunk of guest state is written, mark the corresponding
   3875    shadow area as valid.  This is used to initialise arbitrarily large
   3876    chunks of guest state, hence the _SIZE value, which has to be as
   3877    big as the biggest guest state.
   3878 */
   3879 static void mc_post_reg_write ( CorePart part, ThreadId tid,
   3880                                 PtrdiffT offset, SizeT size)
   3881 {
   3882 #  define MAX_REG_WRITE_SIZE 1664
   3883    UChar area[MAX_REG_WRITE_SIZE];
   3884    tl_assert(size <= MAX_REG_WRITE_SIZE);
   3885    VG_(memset)(area, V_BITS8_DEFINED, size);
   3886    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
   3887 #  undef MAX_REG_WRITE_SIZE
   3888 }
   3889 
   3890 static
   3891 void mc_post_reg_write_clientcall ( ThreadId tid,
   3892                                     PtrdiffT offset, SizeT size, Addr f)
   3893 {
   3894    mc_post_reg_write(/*dummy*/0, tid, offset, size);
   3895 }
   3896 
   3897 /* Look at the definedness of the guest's shadow state for
   3898    [offset, offset+len).  If any part of that is undefined, record
   3899    a parameter error.
   3900 */
   3901 static void mc_pre_reg_read ( CorePart part, ThreadId tid, Char* s,
   3902                               PtrdiffT offset, SizeT size)
   3903 {
   3904    Int   i;
   3905    Bool  bad;
   3906    UInt  otag;
   3907 
   3908    UChar area[16];
   3909    tl_assert(size <= 16);
   3910 
   3911    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
   3912 
   3913    bad = False;
   3914    for (i = 0; i < size; i++) {
   3915       if (area[i] != V_BITS8_DEFINED) {
   3916          bad = True;
   3917          break;
   3918       }
   3919    }
   3920 
   3921    if (!bad)
   3922       return;
   3923 
   3924    /* We've found some undefinedness.  See if we can also find an
   3925       origin for it. */
   3926    otag = mb_get_origin_for_guest_offset( tid, offset, size );
   3927    MC_(record_regparam_error) ( tid, s, otag );
   3928 }
   3929 
   3930 
   3931 /*------------------------------------------------------------*/
   3932 /*--- Functions called directly from generated code:       ---*/
   3933 /*--- Load/store handlers.                                 ---*/
   3934 /*------------------------------------------------------------*/
   3935 
   3936 /* Types:  LOADV32, LOADV16, LOADV8 are:
   3937                UWord fn ( Addr a )
   3938    so they return 32-bits on 32-bit machines and 64-bits on
   3939    64-bit machines.  Addr has the same size as a host word.
   3940 
   3941    LOADV64 is always  ULong fn ( Addr a )
   3942 
   3943    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
   3944    are a UWord, and for STOREV64 they are a ULong.
   3945 */
   3946 
   3947 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
   3948    naturally '_sz/8'-aligned, or it exceeds the range covered by the
   3949    primary map.  This is all very tricky (and important!), so let's
   3950    work through the maths by hand (below), *and* assert for these
   3951    values at startup. */
   3952 #define MASK(_szInBytes) \
   3953    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
   3954 
   3955 /* MASK only exists so as to define this macro. */
   3956 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
   3957    ((_a) & MASK((_szInBits>>3)))
   3958 
   3959 /* On a 32-bit machine:
   3960 
   3961    N_PRIMARY_BITS          == 16, so
   3962    N_PRIMARY_MAP           == 0x10000, so
   3963    N_PRIMARY_MAP-1         == 0xFFFF, so
   3964    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
   3965 
   3966    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
   3967            = ~ ( 0xFFFF | 0xFFFF0000 )
   3968            = ~ 0xFFFF'FFFF
   3969            = 0
   3970 
   3971    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
   3972            = ~ ( 0xFFFE | 0xFFFF0000 )
   3973            = ~ 0xFFFF'FFFE
   3974            = 1
   3975 
   3976    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
   3977            = ~ ( 0xFFFC | 0xFFFF0000 )
   3978            = ~ 0xFFFF'FFFC
   3979            = 3
   3980 
   3981    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
   3982            = ~ ( 0xFFF8 | 0xFFFF0000 )
   3983            = ~ 0xFFFF'FFF8
   3984            = 7
   3985 
   3986    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
   3987    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
   3988    the 1-byte alignment case, it is always a zero value, since MASK(1)
   3989    is zero.  All as expected.
   3990 
   3991    On a 64-bit machine, it's more complex, since we're testing
   3992    simultaneously for misalignment and for the address being at or
   3993    above 32G:
   3994 
   3995    N_PRIMARY_BITS          == 19, so
   3996    N_PRIMARY_MAP           == 0x80000, so
   3997    N_PRIMARY_MAP-1         == 0x7FFFF, so
   3998    (N_PRIMARY_MAP-1) << 16 == 0x7FFFF'0000, and so
   3999 
   4000    MASK(1) = ~ ( (0x10000 - 1) | 0x7FFFF'0000 )
   4001            = ~ ( 0xFFFF | 0x7FFFF'0000 )
   4002            = ~ 0x7FFFF'FFFF
   4003            = 0xFFFF'FFF8'0000'0000
   4004 
   4005    MASK(2) = ~ ( (0x10000 - 2) | 0x7FFFF'0000 )
   4006            = ~ ( 0xFFFE | 0x7FFFF'0000 )
   4007            = ~ 0x7FFFF'FFFE
   4008            = 0xFFFF'FFF8'0000'0001
   4009 
   4010    MASK(4) = ~ ( (0x10000 - 4) | 0x7FFFF'0000 )
   4011            = ~ ( 0xFFFC | 0x7FFFF'0000 )
   4012            = ~ 0x7FFFF'FFFC
   4013            = 0xFFFF'FFF8'0000'0003
   4014 
   4015    MASK(8) = ~ ( (0x10000 - 8) | 0x7FFFF'0000 )
   4016            = ~ ( 0xFFF8 | 0x7FFFF'0000 )
   4017            = ~ 0x7FFFF'FFF8
   4018            = 0xFFFF'FFF8'0000'0007
   4019 */
   4020 
   4021 
   4022 /* ------------------------ Size = 8 ------------------------ */
   4023 
   4024 static INLINE
   4025 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
   4026 {
   4027    PROF_EVENT(200, "mc_LOADV64");
   4028 
   4029 #ifndef PERF_FAST_LOADV
   4030    return mc_LOADVn_slow( a, 64, isBigEndian );
   4031 #else
   4032    {
   4033       UWord   sm_off16, vabits16;
   4034       SecMap* sm;
   4035 
   4036       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4037          PROF_EVENT(201, "mc_LOADV64-slow1");
   4038          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
   4039       }
   4040 
   4041       sm       = get_secmap_for_reading_low(a);
   4042       sm_off16 = SM_OFF_16(a);
   4043       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4044 
   4045       // Handle common case quickly: a is suitably aligned, is mapped, and
   4046       // addressible.
   4047       // Convert V bits from compact memory form to expanded register form.
   4048       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
   4049          return V_BITS64_DEFINED;
   4050       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
   4051          return V_BITS64_UNDEFINED;
   4052       } else {
   4053          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
   4054          PROF_EVENT(202, "mc_LOADV64-slow2");
   4055          return mc_LOADVn_slow( a, 64, isBigEndian );
   4056       }
   4057    }
   4058 #endif
   4059 }
   4060 
   4061 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
   4062 {
   4063    return mc_LOADV64(a, True);
   4064 }
   4065 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
   4066 {
   4067    return mc_LOADV64(a, False);
   4068 }
   4069 
   4070 
   4071 static INLINE
   4072 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
   4073 {
   4074    PROF_EVENT(210, "mc_STOREV64");
   4075 
   4076 #ifndef PERF_FAST_STOREV
   4077    // XXX: this slow case seems to be marginally faster than the fast case!
   4078    // Investigate further.
   4079    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4080 #else
   4081    {
   4082       UWord   sm_off16, vabits16;
   4083       SecMap* sm;
   4084 
   4085       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4086          PROF_EVENT(211, "mc_STOREV64-slow1");
   4087          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4088          return;
   4089       }
   4090 
   4091       sm       = get_secmap_for_reading_low(a);
   4092       sm_off16 = SM_OFF_16(a);
   4093       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4094 
   4095       if (LIKELY( !is_distinguished_sm(sm) &&
   4096                           (VA_BITS16_DEFINED   == vabits16 ||
   4097                            VA_BITS16_UNDEFINED == vabits16) ))
   4098       {
   4099          /* Handle common case quickly: a is suitably aligned, */
   4100          /* is mapped, and is addressible. */
   4101          // Convert full V-bits in register to compact 2-bit form.
   4102          if (V_BITS64_DEFINED == vbits64) {
   4103             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
   4104          } else if (V_BITS64_UNDEFINED == vbits64) {
   4105             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
   4106          } else {
   4107             /* Slow but general case -- writing partially defined bytes. */
   4108             PROF_EVENT(212, "mc_STOREV64-slow2");
   4109             mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4110          }
   4111       } else {
   4112          /* Slow but general case. */
   4113          PROF_EVENT(213, "mc_STOREV64-slow3");
   4114          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4115       }
   4116    }
   4117 #endif
   4118 }
   4119 
   4120 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
   4121 {
   4122    mc_STOREV64(a, vbits64, True);
   4123 }
   4124 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
   4125 {
   4126    mc_STOREV64(a, vbits64, False);
   4127 }
   4128 
   4129 
   4130 /* ------------------------ Size = 4 ------------------------ */
   4131 
   4132 static INLINE
   4133 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
   4134 {
   4135    PROF_EVENT(220, "mc_LOADV32");
   4136 
   4137 #ifndef PERF_FAST_LOADV
   4138    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4139 #else
   4140    {
   4141       UWord   sm_off, vabits8;
   4142       SecMap* sm;
   4143 
   4144       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   4145          PROF_EVENT(221, "mc_LOADV32-slow1");
   4146          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4147       }
   4148 
   4149       sm      = get_secmap_for_reading_low(a);
   4150       sm_off  = SM_OFF(a);
   4151       vabits8 = sm->vabits8[sm_off];
   4152 
   4153       // Handle common case quickly: a is suitably aligned, is mapped, and the
   4154       // entire word32 it lives in is addressible.
   4155       // Convert V bits from compact memory form to expanded register form.
   4156       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
   4157       // Almost certainly not necessary, but be paranoid.
   4158       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   4159          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   4160       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
   4161          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   4162       } else {
   4163          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
   4164          PROF_EVENT(222, "mc_LOADV32-slow2");
   4165          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4166       }
   4167    }
   4168 #endif
   4169 }
   4170 
   4171 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
   4172 {
   4173    return mc_LOADV32(a, True);
   4174 }
   4175 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
   4176 {
   4177    return mc_LOADV32(a, False);
   4178 }
   4179 
   4180 
   4181 static INLINE
   4182 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
   4183 {
   4184    PROF_EVENT(230, "mc_STOREV32");
   4185 
   4186 #ifndef PERF_FAST_STOREV
   4187    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4188 #else
   4189    {
   4190       UWord   sm_off, vabits8;
   4191       SecMap* sm;
   4192 
   4193       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   4194          PROF_EVENT(231, "mc_STOREV32-slow1");
   4195          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4196          return;
   4197       }
   4198 
   4199       sm      = get_secmap_for_reading_low(a);
   4200       sm_off  = SM_OFF(a);
   4201       vabits8 = sm->vabits8[sm_off];
   4202 
   4203       // Cleverness:  sometimes we don't have to write the shadow memory at
   4204       // all, if we can tell that what we want to write is the same as what is
   4205       // already there.  The 64/16/8 bit cases also have cleverness at this
   4206       // point, but it works a little differently to the code below.
   4207       if (V_BITS32_DEFINED == vbits32) {
   4208          if (vabits8 == (UInt)VA_BITS8_DEFINED) {
   4209             return;
   4210          } else if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
   4211             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
   4212          } else {
   4213             // not defined/undefined, or distinguished and changing state
   4214             PROF_EVENT(232, "mc_STOREV32-slow2");
   4215             mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4216          }
   4217       } else if (V_BITS32_UNDEFINED == vbits32) {
   4218          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
   4219             return;
   4220          } else if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
   4221             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
   4222          } else {
   4223             // not defined/undefined, or distinguished and changing state
   4224             PROF_EVENT(233, "mc_STOREV32-slow3");
   4225             mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4226          }
   4227       } else {
   4228          // Partially defined word
   4229          PROF_EVENT(234, "mc_STOREV32-slow4");
   4230          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4231       }
   4232    }
   4233 #endif
   4234 }
   4235 
   4236 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
   4237 {
   4238    mc_STOREV32(a, vbits32, True);
   4239 }
   4240 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
   4241 {
   4242    mc_STOREV32(a, vbits32, False);
   4243 }
   4244 
   4245 
   4246 /* ------------------------ Size = 2 ------------------------ */
   4247 
   4248 static INLINE
   4249 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
   4250 {
   4251    PROF_EVENT(240, "mc_LOADV16");
   4252 
   4253 #ifndef PERF_FAST_LOADV
   4254    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4255 #else
   4256    {
   4257       UWord   sm_off, vabits8;
   4258       SecMap* sm;
   4259 
   4260       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   4261          PROF_EVENT(241, "mc_LOADV16-slow1");
   4262          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4263       }
   4264 
   4265       sm      = get_secmap_for_reading_low(a);
   4266       sm_off  = SM_OFF(a);
   4267       vabits8 = sm->vabits8[sm_off];
   4268       // Handle common case quickly: a is suitably aligned, is mapped, and is
   4269       // addressible.
   4270       // Convert V bits from compact memory form to expanded register form
   4271       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
   4272       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
   4273       else {
   4274          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   4275          // the two sub-bytes.
   4276          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
   4277          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
   4278          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
   4279          else {
   4280             /* Slow case: the two bytes are not all-defined or all-undefined. */
   4281             PROF_EVENT(242, "mc_LOADV16-slow2");
   4282             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4283          }
   4284       }
   4285    }
   4286 #endif
   4287 }
   4288 
   4289 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
   4290 {
   4291    return mc_LOADV16(a, True);
   4292 }
   4293 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
   4294 {
   4295    return mc_LOADV16(a, False);
   4296 }
   4297 
   4298 
   4299 static INLINE
   4300 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
   4301 {
   4302    PROF_EVENT(250, "mc_STOREV16");
   4303 
   4304 #ifndef PERF_FAST_STOREV
   4305    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4306 #else
   4307    {
   4308       UWord   sm_off, vabits8;
   4309       SecMap* sm;
   4310 
   4311       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   4312          PROF_EVENT(251, "mc_STOREV16-slow1");
   4313          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4314          return;
   4315       }
   4316 
   4317       sm      = get_secmap_for_reading_low(a);
   4318       sm_off  = SM_OFF(a);
   4319       vabits8 = sm->vabits8[sm_off];
   4320       if (LIKELY( !is_distinguished_sm(sm) &&
   4321                           (VA_BITS8_DEFINED   == vabits8 ||
   4322                            VA_BITS8_UNDEFINED == vabits8) ))
   4323       {
   4324          /* Handle common case quickly: a is suitably aligned, */
   4325          /* is mapped, and is addressible. */
   4326          // Convert full V-bits in register to compact 2-bit form.
   4327          if (V_BITS16_DEFINED == vbits16) {
   4328             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED ,
   4329                                          &(sm->vabits8[sm_off]) );
   4330          } else if (V_BITS16_UNDEFINED == vbits16) {
   4331             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
   4332                                          &(sm->vabits8[sm_off]) );
   4333          } else {
   4334             /* Slow but general case -- writing partially defined bytes. */
   4335             PROF_EVENT(252, "mc_STOREV16-slow2");
   4336             mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4337          }
   4338       } else {
   4339          /* Slow but general case. */
   4340          PROF_EVENT(253, "mc_STOREV16-slow3");
   4341          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4342       }
   4343    }
   4344 #endif
   4345 }
   4346 
   4347 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
   4348 {
   4349    mc_STOREV16(a, vbits16, True);
   4350 }
   4351 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
   4352 {
   4353    mc_STOREV16(a, vbits16, False);
   4354 }
   4355 
   4356 
   4357 /* ------------------------ Size = 1 ------------------------ */
   4358 /* Note: endianness is irrelevant for size == 1 */
   4359 
   4360 VG_REGPARM(1)
   4361 UWord MC_(helperc_LOADV8) ( Addr a )
   4362 {
   4363    PROF_EVENT(260, "mc_LOADV8");
   4364 
   4365 #ifndef PERF_FAST_LOADV
   4366    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4367 #else
   4368    {
   4369       UWord   sm_off, vabits8;
   4370       SecMap* sm;
   4371 
   4372       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   4373          PROF_EVENT(261, "mc_LOADV8-slow1");
   4374          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4375       }
   4376 
   4377       sm      = get_secmap_for_reading_low(a);
   4378       sm_off  = SM_OFF(a);
   4379       vabits8 = sm->vabits8[sm_off];
   4380       // Convert V bits from compact memory form to expanded register form
   4381       // Handle common case quickly: a is mapped, and the entire
   4382       // word32 it lives in is addressible.
   4383       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
   4384       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
   4385       else {
   4386          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   4387          // the single byte.
   4388          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
   4389          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
   4390          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
   4391          else {
   4392             /* Slow case: the byte is not all-defined or all-undefined. */
   4393             PROF_EVENT(262, "mc_LOADV8-slow2");
   4394             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4395          }
   4396       }
   4397    }
   4398 #endif
   4399 }
   4400 
   4401 
   4402 VG_REGPARM(2)
   4403 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
   4404 {
   4405    PROF_EVENT(270, "mc_STOREV8");
   4406 
   4407 #ifndef PERF_FAST_STOREV
   4408    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4409 #else
   4410    {
   4411       UWord   sm_off, vabits8;
   4412       SecMap* sm;
   4413 
   4414       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   4415          PROF_EVENT(271, "mc_STOREV8-slow1");
   4416          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4417          return;
   4418       }
   4419 
   4420       sm      = get_secmap_for_reading_low(a);
   4421       sm_off  = SM_OFF(a);
   4422       vabits8 = sm->vabits8[sm_off];
   4423       if (LIKELY
   4424             ( !is_distinguished_sm(sm) &&
   4425               ( (VA_BITS8_DEFINED == vabits8 || VA_BITS8_UNDEFINED == vabits8)
   4426              || (VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8))
   4427               )
   4428             )
   4429          )
   4430       {
   4431          /* Handle common case quickly: a is mapped, the entire word32 it
   4432             lives in is addressible. */
   4433          // Convert full V-bits in register to compact 2-bit form.
   4434          if (V_BITS8_DEFINED == vbits8) {
   4435             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
   4436                                           &(sm->vabits8[sm_off]) );
   4437          } else if (V_BITS8_UNDEFINED == vbits8) {
   4438             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
   4439                                           &(sm->vabits8[sm_off]) );
   4440          } else {
   4441             /* Slow but general case -- writing partially defined bytes. */
   4442             PROF_EVENT(272, "mc_STOREV8-slow2");
   4443             mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4444          }
   4445       } else {
   4446          /* Slow but general case. */
   4447          PROF_EVENT(273, "mc_STOREV8-slow3");
   4448          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4449       }
   4450    }
   4451 #endif
   4452 }
   4453 
   4454 
   4455 /*------------------------------------------------------------*/
   4456 /*--- Functions called directly from generated code:       ---*/
   4457 /*--- Value-check failure handlers.                        ---*/
   4458 /*------------------------------------------------------------*/
   4459 
   4460 /* Call these ones when an origin is available ... */
   4461 VG_REGPARM(1)
   4462 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
   4463    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
   4464 }
   4465 
   4466 VG_REGPARM(1)
   4467 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
   4468    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
   4469 }
   4470 
   4471 VG_REGPARM(1)
   4472 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
   4473    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
   4474 }
   4475 
   4476 VG_REGPARM(1)
   4477 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
   4478    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
   4479 }
   4480 
   4481 VG_REGPARM(2)
   4482 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
   4483    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
   4484 }
   4485 
   4486 /* ... and these when an origin isn't available. */
   4487 
   4488 VG_REGPARM(0)
   4489 void MC_(helperc_value_check0_fail_no_o) ( void ) {
   4490    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
   4491 }
   4492 
   4493 VG_REGPARM(0)
   4494 void MC_(helperc_value_check1_fail_no_o) ( void ) {
   4495    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
   4496 }
   4497 
   4498 VG_REGPARM(0)
   4499 void MC_(helperc_value_check4_fail_no_o) ( void ) {
   4500    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
   4501 }
   4502 
   4503 VG_REGPARM(0)
   4504 void MC_(helperc_value_check8_fail_no_o) ( void ) {
   4505    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
   4506 }
   4507 
   4508 VG_REGPARM(1)
   4509 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
   4510    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
   4511 }
   4512 
   4513 
   4514 /*------------------------------------------------------------*/
   4515 /*--- Metadata get/set functions, for client requests.     ---*/
   4516 /*------------------------------------------------------------*/
   4517 
   4518 // Nb: this expands the V+A bits out into register-form V bits, even though
   4519 // they're in memory.  This is for backward compatibility, and because it's
   4520 // probably what the user wants.
   4521 
   4522 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
   4523    error [no longer used], 3 == addressing error. */
   4524 /* Nb: We used to issue various definedness/addressability errors from here,
   4525    but we took them out because they ranged from not-very-helpful to
   4526    downright annoying, and they complicated the error data structures. */
   4527 static Int mc_get_or_set_vbits_for_client (
   4528    Addr a,
   4529    Addr vbits,
   4530    SizeT szB,
   4531    Bool setting, /* True <=> set vbits,  False <=> get vbits */
   4532    Bool is_client_request /* True <=> real user request
   4533                              False <=> internal call from gdbserver */
   4534 )
   4535 {
   4536    SizeT i;
   4537    Bool  ok;
   4538    UChar vbits8;
   4539 
   4540    /* Check that arrays are addressible before doing any getting/setting.
   4541       vbits to be checked only for real user request. */
   4542    for (i = 0; i < szB; i++) {
   4543       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
   4544           (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
   4545          return 3;
   4546       }
   4547    }
   4548 
   4549    /* Do the copy */
   4550    if (setting) {
   4551       /* setting */
   4552       for (i = 0; i < szB; i++) {
   4553          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
   4554          tl_assert(ok);
   4555       }
   4556    } else {
   4557       /* getting */
   4558       for (i = 0; i < szB; i++) {
   4559          ok = get_vbits8(a + i, &vbits8);
   4560          tl_assert(ok);
   4561          ((UChar*)vbits)[i] = vbits8;
   4562       }
   4563       if (is_client_request)
   4564         // The bytes in vbits[] have now been set, so mark them as such.
   4565         MC_(make_mem_defined)(vbits, szB);
   4566    }
   4567 
   4568    return 1;
   4569 }
   4570 
   4571 
   4572 /*------------------------------------------------------------*/
   4573 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
   4574 /*------------------------------------------------------------*/
   4575 
   4576 /* For the memory leak detector, say whether an entire 64k chunk of
   4577    address space is possibly in use, or not.  If in doubt return
   4578    True.
   4579 */
   4580 Bool MC_(is_within_valid_secondary) ( Addr a )
   4581 {
   4582    SecMap* sm = maybe_get_secmap_for ( a );
   4583    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]
   4584        || MC_(in_ignored_range)(a)) {
   4585       /* Definitely not in use. */
   4586       return False;
   4587    } else {
   4588       return True;
   4589    }
   4590 }
   4591 
   4592 
   4593 /* For the memory leak detector, say whether or not a given word
   4594    address is to be regarded as valid. */
   4595 Bool MC_(is_valid_aligned_word) ( Addr a )
   4596 {
   4597    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
   4598    tl_assert(VG_IS_WORD_ALIGNED(a));
   4599    if (is_mem_defined( a, sizeof(UWord), NULL, NULL) == MC_Ok
   4600        && !MC_(in_ignored_range)(a)) {
   4601       return True;
   4602    } else {
   4603       return False;
   4604    }
   4605 }
   4606 
   4607 
   4608 /*------------------------------------------------------------*/
   4609 /*--- Initialisation                                       ---*/
   4610 /*------------------------------------------------------------*/
   4611 
   4612 static void init_shadow_memory ( void )
   4613 {
   4614    Int     i;
   4615    SecMap* sm;
   4616 
   4617    tl_assert(V_BIT_UNDEFINED   == 1);
   4618    tl_assert(V_BIT_DEFINED     == 0);
   4619    tl_assert(V_BITS8_UNDEFINED == 0xFF);
   4620    tl_assert(V_BITS8_DEFINED   == 0);
   4621 
   4622    /* Build the 3 distinguished secondaries */
   4623    sm = &sm_distinguished[SM_DIST_NOACCESS];
   4624    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
   4625 
   4626    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   4627    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
   4628 
   4629    sm = &sm_distinguished[SM_DIST_DEFINED];
   4630    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
   4631 
   4632    /* Set up the primary map. */
   4633    /* These entries gradually get overwritten as the used address
   4634       space expands. */
   4635    for (i = 0; i < N_PRIMARY_MAP; i++)
   4636       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
   4637 
   4638    /* Auxiliary primary maps */
   4639    init_auxmap_L1_L2();
   4640 
   4641    /* auxmap_size = auxmap_used = 0;
   4642       no ... these are statically initialised */
   4643 
   4644    /* Secondary V bit table */
   4645    secVBitTable = createSecVBitTable();
   4646 }
   4647 
   4648 
   4649 /*------------------------------------------------------------*/
   4650 /*--- Sanity check machinery (permanently engaged)         ---*/
   4651 /*------------------------------------------------------------*/
   4652 
   4653 static Bool mc_cheap_sanity_check ( void )
   4654 {
   4655    n_sanity_cheap++;
   4656    PROF_EVENT(490, "cheap_sanity_check");
   4657    /* Check for sane operating level */
   4658    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   4659       return False;
   4660    /* nothing else useful we can rapidly check */
   4661    return True;
   4662 }
   4663 
   4664 static Bool mc_expensive_sanity_check ( void )
   4665 {
   4666    Int     i;
   4667    Word    n_secmaps_found;
   4668    SecMap* sm;
   4669    HChar*  errmsg;
   4670    Bool    bad = False;
   4671 
   4672    if (0) VG_(printf)("expensive sanity check\n");
   4673    if (0) return True;
   4674 
   4675    n_sanity_expensive++;
   4676    PROF_EVENT(491, "expensive_sanity_check");
   4677 
   4678    /* Check for sane operating level */
   4679    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   4680       return False;
   4681 
   4682    /* Check that the 3 distinguished SMs are still as they should be. */
   4683 
   4684    /* Check noaccess DSM. */
   4685    sm = &sm_distinguished[SM_DIST_NOACCESS];
   4686    for (i = 0; i < SM_CHUNKS; i++)
   4687       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
   4688          bad = True;
   4689 
   4690    /* Check undefined DSM. */
   4691    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   4692    for (i = 0; i < SM_CHUNKS; i++)
   4693       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
   4694          bad = True;
   4695 
   4696    /* Check defined DSM. */
   4697    sm = &sm_distinguished[SM_DIST_DEFINED];
   4698    for (i = 0; i < SM_CHUNKS; i++)
   4699       if (sm->vabits8[i] != VA_BITS8_DEFINED)
   4700          bad = True;
   4701 
   4702    if (bad) {
   4703       VG_(printf)("memcheck expensive sanity: "
   4704                   "distinguished_secondaries have changed\n");
   4705       return False;
   4706    }
   4707 
   4708    /* If we're not checking for undefined value errors, the secondary V bit
   4709     * table should be empty. */
   4710    if (MC_(clo_mc_level) == 1) {
   4711       if (0 != VG_(OSetGen_Size)(secVBitTable))
   4712          return False;
   4713    }
   4714 
   4715    /* check the auxiliary maps, very thoroughly */
   4716    n_secmaps_found = 0;
   4717    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
   4718    if (errmsg) {
   4719       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
   4720       return False;
   4721    }
   4722 
   4723    /* n_secmaps_found is now the number referred to by the auxiliary
   4724       primary map.  Now add on the ones referred to by the main
   4725       primary map. */
   4726    for (i = 0; i < N_PRIMARY_MAP; i++) {
   4727       if (primary_map[i] == NULL) {
   4728          bad = True;
   4729       } else {
   4730          if (!is_distinguished_sm(primary_map[i]))
   4731             n_secmaps_found++;
   4732       }
   4733    }
   4734 
   4735    /* check that the number of secmaps issued matches the number that
   4736       are reachable (iow, no secmap leaks) */
   4737    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
   4738       bad = True;
   4739 
   4740    if (bad) {
   4741       VG_(printf)("memcheck expensive sanity: "
   4742                   "apparent secmap leakage\n");
   4743       return False;
   4744    }
   4745 
   4746    if (bad) {
   4747       VG_(printf)("memcheck expensive sanity: "
   4748                   "auxmap covers wrong address space\n");
   4749       return False;
   4750    }
   4751 
   4752    /* there is only one pointer to each secmap (expensive) */
   4753 
   4754    return True;
   4755 }
   4756 
   4757 /*------------------------------------------------------------*/
   4758 /*--- Command line args                                    ---*/
   4759 /*------------------------------------------------------------*/
   4760 
   4761 Bool          MC_(clo_partial_loads_ok)       = False;
   4762 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
   4763 Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
   4764 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
   4765 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
   4766 Bool          MC_(clo_show_reachable)         = False;
   4767 Bool          MC_(clo_show_possibly_lost)     = True;
   4768 Bool          MC_(clo_workaround_gcc296_bugs) = False;
   4769 Int           MC_(clo_malloc_fill)            = -1;
   4770 Int           MC_(clo_free_fill)              = -1;
   4771 Int           MC_(clo_mc_level)               = 2;
   4772 const char*   MC_(clo_summary_file)           = NULL;
   4773 
   4774 
   4775 static Bool mc_process_cmd_line_options(Char* arg)
   4776 {
   4777    Char* tmp_str;
   4778 
   4779    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   4780 
   4781    /* Set MC_(clo_mc_level):
   4782          1 = A bit tracking only
   4783          2 = A and V bit tracking, but no V bit origins
   4784          3 = A and V bit tracking, and V bit origins
   4785 
   4786       Do this by inspecting --undef-value-errors= and
   4787       --track-origins=.  Reject the case --undef-value-errors=no
   4788       --track-origins=yes as meaningless.
   4789    */
   4790    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
   4791       if (MC_(clo_mc_level) == 3) {
   4792          goto bad_level;
   4793       } else {
   4794          MC_(clo_mc_level) = 1;
   4795          return True;
   4796       }
   4797    }
   4798    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
   4799       if (MC_(clo_mc_level) == 1)
   4800          MC_(clo_mc_level) = 2;
   4801       return True;
   4802    }
   4803    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
   4804       if (MC_(clo_mc_level) == 3)
   4805          MC_(clo_mc_level) = 2;
   4806       return True;
   4807    }
   4808    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
   4809       if (MC_(clo_mc_level) == 1) {
   4810          goto bad_level;
   4811       } else {
   4812          MC_(clo_mc_level) = 3;
   4813          return True;
   4814       }
   4815    }
   4816 
   4817 	if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
   4818    else if VG_BOOL_CLO(arg, "--show-reachable",   MC_(clo_show_reachable))   {}
   4819    else if VG_BOOL_CLO(arg, "--show-possibly-lost",
   4820                                             MC_(clo_show_possibly_lost))     {}
   4821    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
   4822                                             MC_(clo_workaround_gcc296_bugs)) {}
   4823 
   4824    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
   4825                                                0, 10*1000*1000*1000LL) {}
   4826 
   4827    else if VG_BINT_CLO(arg, "--freelist-big-blocks",
   4828                        MC_(clo_freelist_big_blocks),
   4829                        0, 10*1000*1000*1000LL) {}
   4830 
   4831    else if VG_XACT_CLO(arg, "--leak-check=no",
   4832                             MC_(clo_leak_check), LC_Off) {}
   4833    else if VG_XACT_CLO(arg, "--leak-check=summary",
   4834                             MC_(clo_leak_check), LC_Summary) {}
   4835    else if VG_XACT_CLO(arg, "--leak-check=yes",
   4836                             MC_(clo_leak_check), LC_Full) {}
   4837    else if VG_XACT_CLO(arg, "--leak-check=full",
   4838                             MC_(clo_leak_check), LC_Full) {}
   4839 
   4840    else if VG_XACT_CLO(arg, "--leak-resolution=low",
   4841                             MC_(clo_leak_resolution), Vg_LowRes) {}
   4842    else if VG_XACT_CLO(arg, "--leak-resolution=med",
   4843                             MC_(clo_leak_resolution), Vg_MedRes) {}
   4844    else if VG_XACT_CLO(arg, "--leak-resolution=high",
   4845                             MC_(clo_leak_resolution), Vg_HighRes) {}
   4846 
   4847    else if VG_STR_CLO(arg, "--summary-file", tmp_str) {
   4848       MC_(clo_summary_file) = VG_(strdup)("clo_summary_file", tmp_str);
   4849    }
   4850    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
   4851       Int  i;
   4852       Bool ok  = parse_ignore_ranges(tmp_str);
   4853       if (!ok)
   4854         return False;
   4855       tl_assert(ignoreRanges.used >= 0);
   4856       tl_assert(ignoreRanges.used < M_IGNORE_RANGES);
   4857       for (i = 0; i < ignoreRanges.used; i++) {
   4858          Addr s = ignoreRanges.start[i];
   4859          Addr e = ignoreRanges.end[i];
   4860          Addr limit = 0x4000000; /* 64M - entirely arbitrary limit */
   4861          if (e <= s) {
   4862             VG_(message)(Vg_DebugMsg,
   4863                "ERROR: --ignore-ranges: end <= start in range:\n");
   4864             VG_(message)(Vg_DebugMsg,
   4865                "       0x%lx-0x%lx\n", s, e);
   4866             return False;
   4867          }
   4868          if (e - s > limit) {
   4869             VG_(message)(Vg_DebugMsg,
   4870                "ERROR: --ignore-ranges: suspiciously large range:\n");
   4871             VG_(message)(Vg_DebugMsg,
   4872                "       0x%lx-0x%lx (size %ld)\n", s, e, (UWord)(e-s));
   4873             return False;
   4874 	 }
   4875       }
   4876    }
   4877 
   4878    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
   4879    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
   4880 
   4881    else
   4882       return VG_(replacement_malloc_process_cmd_line_option)(arg);
   4883 
   4884    return True;
   4885 
   4886 
   4887   bad_level:
   4888    VG_(fmsg_bad_option)(arg,
   4889       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
   4890 }
   4891 
   4892 static void mc_print_usage(void)
   4893 {
   4894    VG_(printf)(
   4895 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
   4896 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
   4897 "    --show-reachable=no|yes          show reachable blocks in leak check? [no]\n"
   4898 "    --show-possibly-lost=no|yes      show possibly lost blocks in leak check?\n"
   4899 "                                     [yes]\n"
   4900 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
   4901 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
   4902 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [no]\n"
   4903 "    --freelist-vol=<number>          volume of freed blocks queue      [20000000]\n"
   4904 "    --freelist-big-blocks=<number>   releases first blocks with size >= [1000000]\n"
   4905 "    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
   4906 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
   4907 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
   4908 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
   4909    );
   4910 }
   4911 
   4912 static void mc_print_debug_usage(void)
   4913 {
   4914    VG_(printf)(
   4915 "    (none)\n"
   4916    );
   4917 }
   4918 
   4919 
   4920 /*------------------------------------------------------------*/
   4921 /*--- Client blocks                                        ---*/
   4922 /*------------------------------------------------------------*/
   4923 
   4924 /* Client block management:
   4925 
   4926    This is managed as an expanding array of client block descriptors.
   4927    Indices of live descriptors are issued to the client, so it can ask
   4928    to free them later.  Therefore we cannot slide live entries down
   4929    over dead ones.  Instead we must use free/inuse flags and scan for
   4930    an empty slot at allocation time.  This in turn means allocation is
   4931    relatively expensive, so we hope this does not happen too often.
   4932 
   4933    An unused block has start == size == 0
   4934 */
   4935 
   4936 /* type CGenBlock is defined in mc_include.h */
   4937 
   4938 /* This subsystem is self-initialising. */
   4939 static UWord      cgb_size = 0;
   4940 static UWord      cgb_used = 0;
   4941 static CGenBlock* cgbs     = NULL;
   4942 
   4943 /* Stats for this subsystem. */
   4944 static ULong cgb_used_MAX = 0;   /* Max in use. */
   4945 static ULong cgb_allocs   = 0;   /* Number of allocs. */
   4946 static ULong cgb_discards = 0;   /* Number of discards. */
   4947 static ULong cgb_search   = 0;   /* Number of searches. */
   4948 
   4949 
   4950 /* Get access to the client block array. */
   4951 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
   4952                                  /*OUT*/UWord* nBlocks )
   4953 {
   4954    *blocks  = cgbs;
   4955    *nBlocks = cgb_used;
   4956 }
   4957 
   4958 
   4959 static
   4960 Int alloc_client_block ( void )
   4961 {
   4962    UWord      i, sz_new;
   4963    CGenBlock* cgbs_new;
   4964 
   4965    cgb_allocs++;
   4966 
   4967    for (i = 0; i < cgb_used; i++) {
   4968       cgb_search++;
   4969       if (cgbs[i].start == 0 && cgbs[i].size == 0)
   4970          return i;
   4971    }
   4972 
   4973    /* Not found.  Try to allocate one at the end. */
   4974    if (cgb_used < cgb_size) {
   4975       cgb_used++;
   4976       return cgb_used-1;
   4977    }
   4978 
   4979    /* Ok, we have to allocate a new one. */
   4980    tl_assert(cgb_used == cgb_size);
   4981    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
   4982 
   4983    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
   4984    for (i = 0; i < cgb_used; i++)
   4985       cgbs_new[i] = cgbs[i];
   4986 
   4987    if (cgbs != NULL)
   4988       VG_(free)( cgbs );
   4989    cgbs = cgbs_new;
   4990 
   4991    cgb_size = sz_new;
   4992    cgb_used++;
   4993    if (cgb_used > cgb_used_MAX)
   4994       cgb_used_MAX = cgb_used;
   4995    return cgb_used-1;
   4996 }
   4997 
   4998 
   4999 static void show_client_block_stats ( void )
   5000 {
   5001    VG_(message)(Vg_DebugMsg,
   5002       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
   5003       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
   5004    );
   5005 }
   5006 static void print_monitor_help ( void )
   5007 {
   5008    VG_(gdb_printf)
   5009       (
   5010 "\n"
   5011 "memcheck monitor commands:\n"
   5012 "  get_vbits <addr> [<len>]\n"
   5013 "        returns validity bits for <len> (or 1) bytes at <addr>\n"
   5014 "            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
   5015 "        Example: get_vbits 0x8049c78 10\n"
   5016 "  make_memory [noaccess|undefined\n"
   5017 "                     |defined|Definedifaddressable] <addr> [<len>]\n"
   5018 "        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
   5019 "  check_memory [addressable|defined] <addr> [<len>]\n"
   5020 "        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
   5021 "            and outputs a description of <addr>\n"
   5022 "  leak_check [full*|summary] [reachable|possibleleak*|definiteleak]\n"
   5023 "                [increased*|changed|any]\n"
   5024 "            * = defaults\n"
   5025 "        Examples: leak_check\n"
   5026 "                  leak_check summary any\n"
   5027 "\n");
   5028 }
   5029 
   5030 /* return True if request recognised, False otherwise */
   5031 static Bool handle_gdb_monitor_command (ThreadId tid, Char *req)
   5032 {
   5033    Char* wcmd;
   5034    Char s[VG_(strlen(req))]; /* copy for strtok_r */
   5035    Char *ssaveptr;
   5036 
   5037    VG_(strcpy) (s, req);
   5038 
   5039    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
   5040    /* NB: if possible, avoid introducing a new command below which
   5041       starts with the same first letter(s) as an already existing
   5042       command. This ensures a shorter abbreviation for the user. */
   5043    switch (VG_(keyword_id)
   5044            ("help get_vbits leak_check make_memory check_memory",
   5045             wcmd, kwd_report_duplicated_matches)) {
   5046    case -2: /* multiple matches */
   5047       return True;
   5048    case -1: /* not found */
   5049       return False;
   5050    case  0: /* help */
   5051       print_monitor_help();
   5052       return True;
   5053    case  1: { /* get_vbits */
   5054       Addr address;
   5055       SizeT szB = 1;
   5056       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
   5057       if (szB != 0) {
   5058          UChar vbits;
   5059          Int i;
   5060          Int unaddressable = 0;
   5061          for (i = 0; i < szB; i++) {
   5062             Int res = mc_get_or_set_vbits_for_client
   5063                (address+i, (Addr) &vbits, 1,
   5064                 False, /* get them */
   5065                 False  /* is client request */ );
   5066             if ((i % 32) == 0 && i != 0)
   5067                VG_(gdb_printf) ("\n");
   5068             else if ((i % 4) == 0 && i != 0)
   5069                VG_(gdb_printf) (" ");
   5070             if (res == 1) {
   5071                VG_(gdb_printf) ("%02x", vbits);
   5072             } else {
   5073                tl_assert(3 == res);
   5074                unaddressable++;
   5075                VG_(gdb_printf) ("__");
   5076             }
   5077          }
   5078          if ((i % 80) != 0)
   5079             VG_(gdb_printf) ("\n");
   5080          if (unaddressable) {
   5081             VG_(gdb_printf)
   5082                ("Address %p len %ld has %d bytes unaddressable\n",
   5083                 (void *)address, szB, unaddressable);
   5084          }
   5085       }
   5086       return True;
   5087    }
   5088    case  2: { /* leak_check */
   5089       Int err = 0;
   5090       LeakCheckParams lcp;
   5091       Char* kw;
   5092 
   5093       lcp.mode               = LC_Full;
   5094       lcp.show_reachable     = False;
   5095       lcp.show_possibly_lost = True;
   5096       lcp.deltamode          = LCD_Increased;
   5097       lcp.requested_by_monitor_command = True;
   5098 
   5099       for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5100            kw != NULL;
   5101            kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
   5102          switch (VG_(keyword_id)
   5103                  ("full summary "
   5104                   "reachable possibleleak definiteleak "
   5105                   "increased changed any",
   5106                   kw, kwd_report_all)) {
   5107          case -2: err++; break;
   5108          case -1: err++; break;
   5109          case  0: /* full */
   5110             lcp.mode = LC_Full; break;
   5111          case  1: /* summary */
   5112             lcp.mode = LC_Summary; break;
   5113          case  2: /* reachable */
   5114             lcp.show_reachable = True;
   5115             lcp.show_possibly_lost = True; break;
   5116          case  3: /* possibleleak */
   5117             lcp.show_reachable = False;
   5118             lcp.show_possibly_lost = True; break;
   5119          case  4: /* definiteleak */
   5120             lcp.show_reachable = False;
   5121             lcp.show_possibly_lost = False; break;
   5122          case  5: /* increased */
   5123             lcp.deltamode = LCD_Increased; break;
   5124          case  6: /* changed */
   5125             lcp.deltamode = LCD_Changed; break;
   5126          case  7: /* any */
   5127             lcp.deltamode = LCD_Any; break;
   5128          default:
   5129             tl_assert (0);
   5130          }
   5131       }
   5132       if (!err)
   5133          MC_(detect_memory_leaks)(tid, lcp);
   5134       return True;
   5135    }
   5136 
   5137    case  3: { /* make_memory */
   5138       Addr address;
   5139       SizeT szB = 1;
   5140       int kwdid = VG_(keyword_id)
   5141          ("noaccess undefined defined Definedifaddressable",
   5142           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
   5143       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
   5144       if (address == (Addr) 0 && szB == 0) return True;
   5145       switch (kwdid) {
   5146       case -2: break;
   5147       case -1: break;
   5148       case  0: MC_(make_mem_noaccess) (address, szB); break;
   5149       case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
   5150                                                     MC_OKIND_USER ); break;
   5151       case  2: MC_(make_mem_defined) ( address, szB ); break;
   5152       case  3: make_mem_defined_if_addressable ( address, szB ); break;;
   5153       default: tl_assert(0);
   5154       }
   5155       return True;
   5156    }
   5157 
   5158    case  4: { /* check_memory */
   5159       Addr address;
   5160       SizeT szB = 1;
   5161       Addr bad_addr;
   5162       UInt okind;
   5163       char* src;
   5164       UInt otag;
   5165       UInt ecu;
   5166       ExeContext* origin_ec;
   5167       MC_ReadResult res;
   5168 
   5169       int kwdid = VG_(keyword_id)
   5170          ("addressable defined",
   5171           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
   5172       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
   5173       if (address == (Addr) 0 && szB == 0) return True;
   5174       switch (kwdid) {
   5175       case -2: break;
   5176       case -1: break;
   5177       case  0:
   5178          if (is_mem_addressable ( address, szB, &bad_addr ))
   5179             VG_(gdb_printf) ("Address %p len %ld addressable\n",
   5180                              (void *)address, szB);
   5181          else
   5182             VG_(gdb_printf)
   5183                ("Address %p len %ld not addressable:\nbad address %p\n",
   5184                 (void *)address, szB, (void *) bad_addr);
   5185          MC_(pp_describe_addr) (address);
   5186          break;
   5187       case  1: res = is_mem_defined ( address, szB, &bad_addr, &otag );
   5188          if (MC_AddrErr == res)
   5189             VG_(gdb_printf)
   5190                ("Address %p len %ld not addressable:\nbad address %p\n",
   5191                 (void *)address, szB, (void *) bad_addr);
   5192          else if (MC_ValueErr == res) {
   5193             okind = otag & 3;
   5194             switch (okind) {
   5195             case MC_OKIND_STACK:
   5196                src = " was created by a stack allocation"; break;
   5197             case MC_OKIND_HEAP:
   5198                src = " was created by a heap allocation"; break;
   5199             case MC_OKIND_USER:
   5200                src = " was created by a client request"; break;
   5201             case MC_OKIND_UNKNOWN:
   5202                src = ""; break;
   5203             default: tl_assert(0);
   5204             }
   5205             VG_(gdb_printf)
   5206                ("Address %p len %ld not defined:\n"
   5207                 "Uninitialised value at %p%s\n",
   5208                 (void *)address, szB, (void *) bad_addr, src);
   5209             ecu = otag & ~3;
   5210             if (VG_(is_plausible_ECU)(ecu)) {
   5211                origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
   5212                VG_(pp_ExeContext)( origin_ec );
   5213             }
   5214          }
   5215          else
   5216             VG_(gdb_printf) ("Address %p len %ld defined\n",
   5217                              (void *)address, szB);
   5218          MC_(pp_describe_addr) (address);
   5219          break;
   5220       default: tl_assert(0);
   5221       }
   5222       return True;
   5223    }
   5224 
   5225    default:
   5226       tl_assert(0);
   5227       return False;
   5228    }
   5229 }
   5230 
   5231 /*------------------------------------------------------------*/
   5232 /*--- Client requests                                      ---*/
   5233 /*------------------------------------------------------------*/
   5234 
   5235 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
   5236 {
   5237    Int   i;
   5238    Bool  ok;
   5239    Addr  bad_addr;
   5240 
   5241    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
   5242        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
   5243        && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
   5244        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
   5245        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
   5246        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
   5247        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
   5248        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
   5249        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
   5250        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
   5251        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
   5252        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
   5253        && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0])
   5254       return False;
   5255 
   5256    switch (arg[0]) {
   5257       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE:
   5258          ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
   5259          if (!ok)
   5260             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
   5261          *ret = ok ? (UWord)NULL : bad_addr;
   5262          break;
   5263 
   5264       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
   5265          Bool errorV    = False;
   5266          Addr bad_addrV = 0;
   5267          UInt otagV     = 0;
   5268          Bool errorA    = False;
   5269          Addr bad_addrA = 0;
   5270          is_mem_defined_comprehensive(
   5271             arg[1], arg[2],
   5272             &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
   5273          );
   5274          if (errorV) {
   5275             MC_(record_user_error) ( tid, bad_addrV,
   5276                                      /*isAddrErr*/False, otagV );
   5277          }
   5278          if (errorA) {
   5279             MC_(record_user_error) ( tid, bad_addrA,
   5280                                      /*isAddrErr*/True, 0 );
   5281          }
   5282          /* Return the lower of the two erring addresses, if any. */
   5283          *ret = 0;
   5284          if (errorV && !errorA) {
   5285             *ret = bad_addrV;
   5286          }
   5287          if (!errorV && errorA) {
   5288             *ret = bad_addrA;
   5289          }
   5290          if (errorV && errorA) {
   5291             *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
   5292          }
   5293          break;
   5294       }
   5295 
   5296       case VG_USERREQ__DO_LEAK_CHECK: {
   5297          LeakCheckParams lcp;
   5298 
   5299          if (arg[1] == 0)
   5300             lcp.mode = LC_Full;
   5301          else if (arg[1] == 1)
   5302             lcp.mode = LC_Summary;
   5303          else {
   5304             VG_(message)(Vg_UserMsg,
   5305                          "Warning: unknown memcheck leak search mode\n");
   5306             lcp.mode = LC_Full;
   5307          }
   5308 
   5309          lcp.show_reachable = MC_(clo_show_reachable);
   5310          lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
   5311 
   5312          if (arg[2] == 0)
   5313             lcp.deltamode = LCD_Any;
   5314          else if (arg[2] == 1)
   5315             lcp.deltamode = LCD_Increased;
   5316          else if (arg[2] == 2)
   5317             lcp.deltamode = LCD_Changed;
   5318          else {
   5319             VG_(message)
   5320                (Vg_UserMsg,
   5321                 "Warning: unknown memcheck leak search deltamode\n");
   5322             lcp.deltamode = LCD_Any;
   5323          }
   5324          lcp.requested_by_monitor_command = False;
   5325 
   5326          MC_(detect_memory_leaks)(tid, lcp);
   5327          *ret = 0; /* return value is meaningless */
   5328          break;
   5329       }
   5330 
   5331       case VG_USERREQ__MAKE_MEM_NOACCESS:
   5332          MC_(make_mem_noaccess) ( arg[1], arg[2] );
   5333          *ret = -1;
   5334          break;
   5335 
   5336       case VG_USERREQ__MAKE_MEM_UNDEFINED:
   5337          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
   5338                                               MC_OKIND_USER );
   5339          *ret = -1;
   5340          break;
   5341 
   5342       case VG_USERREQ__MAKE_MEM_DEFINED:
   5343          MC_(make_mem_defined) ( arg[1], arg[2] );
   5344          *ret = -1;
   5345          break;
   5346 
   5347       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
   5348          make_mem_defined_if_addressable ( arg[1], arg[2] );
   5349          *ret = -1;
   5350          break;
   5351 
   5352       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
   5353          if (arg[1] != 0 && arg[2] != 0) {
   5354             i = alloc_client_block();
   5355             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
   5356             cgbs[i].start = arg[1];
   5357             cgbs[i].size  = arg[2];
   5358             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (Char *)arg[3]);
   5359             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
   5360             *ret = i;
   5361          } else
   5362             *ret = -1;
   5363          break;
   5364 
   5365       case VG_USERREQ__DISCARD: /* discard */
   5366          if (cgbs == NULL
   5367              || arg[2] >= cgb_used ||
   5368              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
   5369             *ret = 1;
   5370          } else {
   5371             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
   5372             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
   5373             VG_(free)(cgbs[arg[2]].desc);
   5374             cgb_discards++;
   5375             *ret = 0;
   5376          }
   5377          break;
   5378 
   5379       case VG_USERREQ__GET_VBITS:
   5380          *ret = mc_get_or_set_vbits_for_client
   5381                    ( arg[1], arg[2], arg[3],
   5382                      False /* get them */,
   5383                      True /* is client request */ );
   5384          break;
   5385 
   5386       case VG_USERREQ__SET_VBITS:
   5387          *ret = mc_get_or_set_vbits_for_client
   5388                    ( arg[1], arg[2], arg[3],
   5389                      True /* set them */,
   5390                      True /* is client request */ );
   5391          break;
   5392 
   5393       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
   5394          UWord** argp = (UWord**)arg;
   5395          // MC_(bytes_leaked) et al were set by the last leak check (or zero
   5396          // if no prior leak checks performed).
   5397          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
   5398          *argp[2] = MC_(bytes_dubious);
   5399          *argp[3] = MC_(bytes_reachable);
   5400          *argp[4] = MC_(bytes_suppressed);
   5401          // there is no argp[5]
   5402          //*argp[5] = MC_(bytes_indirect);
   5403          // XXX need to make *argp[1-4] defined;  currently done in the
   5404          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
   5405          *ret = 0;
   5406          return True;
   5407       }
   5408       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
   5409          UWord** argp = (UWord**)arg;
   5410          // MC_(blocks_leaked) et al were set by the last leak check (or zero
   5411          // if no prior leak checks performed).
   5412          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
   5413          *argp[2] = MC_(blocks_dubious);
   5414          *argp[3] = MC_(blocks_reachable);
   5415          *argp[4] = MC_(blocks_suppressed);
   5416          // there is no argp[5]
   5417          //*argp[5] = MC_(blocks_indirect);
   5418          // XXX need to make *argp[1-4] defined;  currently done in the
   5419          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
   5420          *ret = 0;
   5421          return True;
   5422       }
   5423       case VG_USERREQ__MALLOCLIKE_BLOCK: {
   5424          Addr p         = (Addr)arg[1];
   5425          SizeT sizeB    =       arg[2];
   5426          //UInt rzB       =       arg[3];    XXX: unused!
   5427          Bool is_zeroed = (Bool)arg[4];
   5428 
   5429          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
   5430                           MC_AllocCustom, MC_(malloc_list) );
   5431          return True;
   5432       }
   5433       case VG_USERREQ__RESIZEINPLACE_BLOCK: {
   5434          Addr p         = (Addr)arg[1];
   5435          SizeT oldSizeB =       arg[2];
   5436          SizeT newSizeB =       arg[3];
   5437          UInt rzB       =       arg[4];
   5438 
   5439          MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
   5440          return True;
   5441       }
   5442       case VG_USERREQ__FREELIKE_BLOCK: {
   5443          Addr p         = (Addr)arg[1];
   5444          UInt rzB       =       arg[2];
   5445 
   5446          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
   5447          return True;
   5448       }
   5449 
   5450       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
   5451          Char* s   = (Char*)arg[1];
   5452          Addr  dst = (Addr) arg[2];
   5453          Addr  src = (Addr) arg[3];
   5454          SizeT len = (SizeT)arg[4];
   5455          MC_(record_overlap_error)(tid, s, src, dst, len);
   5456          return True;
   5457       }
   5458 
   5459       case VG_USERREQ__CREATE_MEMPOOL: {
   5460          Addr pool      = (Addr)arg[1];
   5461          UInt rzB       =       arg[2];
   5462          Bool is_zeroed = (Bool)arg[3];
   5463 
   5464          MC_(create_mempool) ( pool, rzB, is_zeroed );
   5465          return True;
   5466       }
   5467 
   5468       case VG_USERREQ__DESTROY_MEMPOOL: {
   5469          Addr pool      = (Addr)arg[1];
   5470 
   5471          MC_(destroy_mempool) ( pool );
   5472          return True;
   5473       }
   5474 
   5475       case VG_USERREQ__MEMPOOL_ALLOC: {
   5476          Addr pool      = (Addr)arg[1];
   5477          Addr addr      = (Addr)arg[2];
   5478          UInt size      =       arg[3];
   5479 
   5480          MC_(mempool_alloc) ( tid, pool, addr, size );
   5481          return True;
   5482       }
   5483 
   5484       case VG_USERREQ__MEMPOOL_FREE: {
   5485          Addr pool      = (Addr)arg[1];
   5486          Addr addr      = (Addr)arg[2];
   5487 
   5488          MC_(mempool_free) ( pool, addr );
   5489          return True;
   5490       }
   5491 
   5492       case VG_USERREQ__MEMPOOL_TRIM: {
   5493          Addr pool      = (Addr)arg[1];
   5494          Addr addr      = (Addr)arg[2];
   5495          UInt size      =       arg[3];
   5496 
   5497          MC_(mempool_trim) ( pool, addr, size );
   5498          return True;
   5499       }
   5500 
   5501       case VG_USERREQ__MOVE_MEMPOOL: {
   5502          Addr poolA     = (Addr)arg[1];
   5503          Addr poolB     = (Addr)arg[2];
   5504 
   5505          MC_(move_mempool) ( poolA, poolB );
   5506          return True;
   5507       }
   5508 
   5509       case VG_USERREQ__MEMPOOL_CHANGE: {
   5510          Addr pool      = (Addr)arg[1];
   5511          Addr addrA     = (Addr)arg[2];
   5512          Addr addrB     = (Addr)arg[3];
   5513          UInt size      =       arg[4];
   5514 
   5515          MC_(mempool_change) ( pool, addrA, addrB, size );
   5516          return True;
   5517       }
   5518 
   5519       case VG_USERREQ__MEMPOOL_EXISTS: {
   5520          Addr pool      = (Addr)arg[1];
   5521 
   5522          *ret = (UWord) MC_(mempool_exists) ( pool );
   5523 	 return True;
   5524       }
   5525 
   5526       case VG_USERREQ__GDB_MONITOR_COMMAND: {
   5527          Bool handled = handle_gdb_monitor_command (tid, (Char*)arg[1]);
   5528          if (handled)
   5529             *ret = 1;
   5530          else
   5531             *ret = 0;
   5532          return handled;
   5533       }
   5534 
   5535       default:
   5536          VG_(message)(
   5537             Vg_UserMsg,
   5538             "Warning: unknown memcheck client request code %llx\n",
   5539             (ULong)arg[0]
   5540          );
   5541          return False;
   5542    }
   5543    return True;
   5544 }
   5545 
   5546 
   5547 /*------------------------------------------------------------*/
   5548 /*--- Crude profiling machinery.                           ---*/
   5549 /*------------------------------------------------------------*/
   5550 
   5551 // We track a number of interesting events (using PROF_EVENT)
   5552 // if MC_PROFILE_MEMORY is defined.
   5553 
   5554 #ifdef MC_PROFILE_MEMORY
   5555 
   5556 UInt   MC_(event_ctr)[N_PROF_EVENTS];
   5557 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
   5558 
   5559 static void init_prof_mem ( void )
   5560 {
   5561    Int i;
   5562    for (i = 0; i < N_PROF_EVENTS; i++) {
   5563       MC_(event_ctr)[i] = 0;
   5564       MC_(event_ctr_name)[i] = NULL;
   5565    }
   5566 }
   5567 
   5568 static void done_prof_mem ( void )
   5569 {
   5570    Int  i;
   5571    Bool spaced = False;
   5572    for (i = 0; i < N_PROF_EVENTS; i++) {
   5573       if (!spaced && (i % 10) == 0) {
   5574          VG_(printf)("\n");
   5575          spaced = True;
   5576       }
   5577       if (MC_(event_ctr)[i] > 0) {
   5578          spaced = False;
   5579          VG_(printf)( "prof mem event %3d: %9d   %s\n",
   5580                       i, MC_(event_ctr)[i],
   5581                       MC_(event_ctr_name)[i]
   5582                          ? MC_(event_ctr_name)[i] : "unnamed");
   5583       }
   5584    }
   5585 }
   5586 
   5587 #else
   5588 
   5589 static void init_prof_mem ( void ) { }
   5590 static void done_prof_mem ( void ) { }
   5591 
   5592 #endif
   5593 
   5594 
   5595 /*------------------------------------------------------------*/
   5596 /*--- Origin tracking stuff                                ---*/
   5597 /*------------------------------------------------------------*/
   5598 
   5599 /*--------------------------------------------*/
   5600 /*--- Origin tracking: load handlers       ---*/
   5601 /*--------------------------------------------*/
   5602 
   5603 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
   5604    return or1 > or2 ? or1 : or2;
   5605 }
   5606 
   5607 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
   5608    OCacheLine* line;
   5609    UChar descr;
   5610    UWord lineoff = oc_line_offset(a);
   5611    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   5612 
   5613    if (OC_ENABLE_ASSERTIONS) {
   5614       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5615    }
   5616 
   5617    line = find_OCacheLine( a );
   5618 
   5619    descr = line->descr[lineoff];
   5620    if (OC_ENABLE_ASSERTIONS) {
   5621       tl_assert(descr < 0x10);
   5622    }
   5623 
   5624    if (LIKELY(0 == (descr & (1 << byteoff))))  {
   5625       return 0;
   5626    } else {
   5627       return line->w32[lineoff];
   5628    }
   5629 }
   5630 
   5631 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
   5632    OCacheLine* line;
   5633    UChar descr;
   5634    UWord lineoff, byteoff;
   5635 
   5636    if (UNLIKELY(a & 1)) {
   5637       /* Handle misaligned case, slowly. */
   5638       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
   5639       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
   5640       return merge_origins(oLo, oHi);
   5641    }
   5642 
   5643    lineoff = oc_line_offset(a);
   5644    byteoff = a & 3; /* 0 or 2 */
   5645 
   5646    if (OC_ENABLE_ASSERTIONS) {
   5647       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5648    }
   5649    line = find_OCacheLine( a );
   5650 
   5651    descr = line->descr[lineoff];
   5652    if (OC_ENABLE_ASSERTIONS) {
   5653       tl_assert(descr < 0x10);
   5654    }
   5655 
   5656    if (LIKELY(0 == (descr & (3 << byteoff)))) {
   5657       return 0;
   5658    } else {
   5659       return line->w32[lineoff];
   5660    }
   5661 }
   5662 
   5663 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
   5664    OCacheLine* line;
   5665    UChar descr;
   5666    UWord lineoff;
   5667 
   5668    if (UNLIKELY(a & 3)) {
   5669       /* Handle misaligned case, slowly. */
   5670       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
   5671       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
   5672       return merge_origins(oLo, oHi);
   5673    }
   5674 
   5675    lineoff = oc_line_offset(a);
   5676    if (OC_ENABLE_ASSERTIONS) {
   5677       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5678    }
   5679 
   5680    line = find_OCacheLine( a );
   5681 
   5682    descr = line->descr[lineoff];
   5683    if (OC_ENABLE_ASSERTIONS) {
   5684       tl_assert(descr < 0x10);
   5685    }
   5686 
   5687    if (LIKELY(0 == descr)) {
   5688       return 0;
   5689    } else {
   5690       return line->w32[lineoff];
   5691    }
   5692 }
   5693 
   5694 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
   5695    OCacheLine* line;
   5696    UChar descrLo, descrHi, descr;
   5697    UWord lineoff;
   5698 
   5699    if (UNLIKELY(a & 7)) {
   5700       /* Handle misaligned case, slowly. */
   5701       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
   5702       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
   5703       return merge_origins(oLo, oHi);
   5704    }
   5705 
   5706    lineoff = oc_line_offset(a);
   5707    if (OC_ENABLE_ASSERTIONS) {
   5708       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   5709    }
   5710 
   5711    line = find_OCacheLine( a );
   5712 
   5713    descrLo = line->descr[lineoff + 0];
   5714    descrHi = line->descr[lineoff + 1];
   5715    descr   = descrLo | descrHi;
   5716    if (OC_ENABLE_ASSERTIONS) {
   5717       tl_assert(descr < 0x10);
   5718    }
   5719 
   5720    if (LIKELY(0 == descr)) {
   5721       return 0; /* both 32-bit chunks are defined */
   5722    } else {
   5723       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
   5724       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
   5725       return merge_origins(oLo, oHi);
   5726    }
   5727 }
   5728 
   5729 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
   5730    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
   5731    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
   5732    UInt oBoth = merge_origins(oLo, oHi);
   5733    return (UWord)oBoth;
   5734 }
   5735 
   5736 
   5737 /*--------------------------------------------*/
   5738 /*--- Origin tracking: store handlers      ---*/
   5739 /*--------------------------------------------*/
   5740 
   5741 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
   5742    OCacheLine* line;
   5743    UWord lineoff = oc_line_offset(a);
   5744    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   5745 
   5746    if (OC_ENABLE_ASSERTIONS) {
   5747       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5748    }
   5749 
   5750    line = find_OCacheLine( a );
   5751 
   5752    if (d32 == 0) {
   5753       line->descr[lineoff] &= ~(1 << byteoff);
   5754    } else {
   5755       line->descr[lineoff] |= (1 << byteoff);
   5756       line->w32[lineoff] = d32;
   5757    }
   5758 }
   5759 
   5760 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
   5761    OCacheLine* line;
   5762    UWord lineoff, byteoff;
   5763 
   5764    if (UNLIKELY(a & 1)) {
   5765       /* Handle misaligned case, slowly. */
   5766       MC_(helperc_b_store1)( a + 0, d32 );
   5767       MC_(helperc_b_store1)( a + 1, d32 );
   5768       return;
   5769    }
   5770 
   5771    lineoff = oc_line_offset(a);
   5772    byteoff = a & 3; /* 0 or 2 */
   5773 
   5774    if (OC_ENABLE_ASSERTIONS) {
   5775       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5776    }
   5777 
   5778    line = find_OCacheLine( a );
   5779 
   5780    if (d32 == 0) {
   5781       line->descr[lineoff] &= ~(3 << byteoff);
   5782    } else {
   5783       line->descr[lineoff] |= (3 << byteoff);
   5784       line->w32[lineoff] = d32;
   5785    }
   5786 }
   5787 
   5788 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
   5789    OCacheLine* line;
   5790    UWord lineoff;
   5791 
   5792    if (UNLIKELY(a & 3)) {
   5793       /* Handle misaligned case, slowly. */
   5794       MC_(helperc_b_store2)( a + 0, d32 );
   5795       MC_(helperc_b_store2)( a + 2, d32 );
   5796       return;
   5797    }
   5798 
   5799    lineoff = oc_line_offset(a);
   5800    if (OC_ENABLE_ASSERTIONS) {
   5801       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5802    }
   5803 
   5804    line = find_OCacheLine( a );
   5805 
   5806    if (d32 == 0) {
   5807       line->descr[lineoff] = 0;
   5808    } else {
   5809       line->descr[lineoff] = 0xF;
   5810       line->w32[lineoff] = d32;
   5811    }
   5812 }
   5813 
   5814 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
   5815    OCacheLine* line;
   5816    UWord lineoff;
   5817 
   5818    if (UNLIKELY(a & 7)) {
   5819       /* Handle misaligned case, slowly. */
   5820       MC_(helperc_b_store4)( a + 0, d32 );
   5821       MC_(helperc_b_store4)( a + 4, d32 );
   5822       return;
   5823    }
   5824 
   5825    lineoff = oc_line_offset(a);
   5826    if (OC_ENABLE_ASSERTIONS) {
   5827       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   5828    }
   5829 
   5830    line = find_OCacheLine( a );
   5831 
   5832    if (d32 == 0) {
   5833       line->descr[lineoff + 0] = 0;
   5834       line->descr[lineoff + 1] = 0;
   5835    } else {
   5836       line->descr[lineoff + 0] = 0xF;
   5837       line->descr[lineoff + 1] = 0xF;
   5838       line->w32[lineoff + 0] = d32;
   5839       line->w32[lineoff + 1] = d32;
   5840    }
   5841 }
   5842 
   5843 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
   5844    MC_(helperc_b_store8)( a + 0, d32 );
   5845    MC_(helperc_b_store8)( a + 8, d32 );
   5846 }
   5847 
   5848 
   5849 /*--------------------------------------------*/
   5850 /*--- Origin tracking: sarp handlers       ---*/
   5851 /*--------------------------------------------*/
   5852 
   5853 __attribute__((noinline))
   5854 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
   5855    if ((a & 1) && len >= 1) {
   5856       MC_(helperc_b_store1)( a, otag );
   5857       a++;
   5858       len--;
   5859    }
   5860    if ((a & 2) && len >= 2) {
   5861       MC_(helperc_b_store2)( a, otag );
   5862       a += 2;
   5863       len -= 2;
   5864    }
   5865    if (len >= 4)
   5866       tl_assert(0 == (a & 3));
   5867    while (len >= 4) {
   5868       MC_(helperc_b_store4)( a, otag );
   5869       a += 4;
   5870       len -= 4;
   5871    }
   5872    if (len >= 2) {
   5873       MC_(helperc_b_store2)( a, otag );
   5874       a += 2;
   5875       len -= 2;
   5876    }
   5877    if (len >= 1) {
   5878       MC_(helperc_b_store1)( a, otag );
   5879       //a++;
   5880       len--;
   5881    }
   5882    tl_assert(len == 0);
   5883 }
   5884 
   5885 __attribute__((noinline))
   5886 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
   5887    if ((a & 1) && len >= 1) {
   5888       MC_(helperc_b_store1)( a, 0 );
   5889       a++;
   5890       len--;
   5891    }
   5892    if ((a & 2) && len >= 2) {
   5893       MC_(helperc_b_store2)( a, 0 );
   5894       a += 2;
   5895       len -= 2;
   5896    }
   5897    if (len >= 4)
   5898       tl_assert(0 == (a & 3));
   5899    while (len >= 4) {
   5900       MC_(helperc_b_store4)( a, 0 );
   5901       a += 4;
   5902       len -= 4;
   5903    }
   5904    if (len >= 2) {
   5905       MC_(helperc_b_store2)( a, 0 );
   5906       a += 2;
   5907       len -= 2;
   5908    }
   5909    if (len >= 1) {
   5910       MC_(helperc_b_store1)( a, 0 );
   5911       //a++;
   5912       len--;
   5913    }
   5914    tl_assert(len == 0);
   5915 }
   5916 
   5917 
   5918 /*------------------------------------------------------------*/
   5919 /*--- Setup and finalisation                               ---*/
   5920 /*------------------------------------------------------------*/
   5921 
   5922 static void mc_post_clo_init ( void )
   5923 {
   5924    // timurrrr: removed the check for VG_(clo_xml) here.
   5925    if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
   5926       VG_(message)(Vg_UserMsg,
   5927                    "Warning: --freelist-big-blocks value %lld has no effect\n"
   5928                    "as it is >= to --freelist-vol value %lld\n",
   5929                    MC_(clo_freelist_big_blocks),
   5930                    MC_(clo_freelist_vol));
   5931 
   5932    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   5933 
   5934    if (MC_(clo_mc_level) == 3) {
   5935       /* We're doing origin tracking. */
   5936 #     ifdef PERF_FAST_STACK
   5937       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
   5938       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
   5939       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
   5940       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
   5941       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
   5942       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
   5943       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
   5944       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
   5945       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
   5946 #     endif
   5947       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
   5948    } else {
   5949       /* Not doing origin tracking */
   5950 #     ifdef PERF_FAST_STACK
   5951       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
   5952       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
   5953       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
   5954       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
   5955       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
   5956       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
   5957       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
   5958       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
   5959       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
   5960 #     endif
   5961       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
   5962    }
   5963 
   5964    /* This origin tracking cache is huge (~100M), so only initialise
   5965       if we need it. */
   5966    if (MC_(clo_mc_level) >= 3) {
   5967       init_OCache();
   5968       tl_assert(ocacheL1 != NULL);
   5969       tl_assert(ocacheL2 != NULL);
   5970    } else {
   5971       tl_assert(ocacheL1 == NULL);
   5972       tl_assert(ocacheL2 == NULL);
   5973    }
   5974 }
   5975 
   5976 static void print_SM_info(char* type, int n_SMs)
   5977 {
   5978    VG_(message)(Vg_DebugMsg,
   5979       " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
   5980       type,
   5981       n_SMs,
   5982       n_SMs * sizeof(SecMap) / 1024UL,
   5983       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
   5984 }
   5985 
   5986 static void mc_fini ( Int exitcode )
   5987 {
   5988    MC_(print_malloc_stats)();
   5989 
   5990    if (MC_(clo_leak_check) != LC_Off) {
   5991       LeakCheckParams lcp;
   5992       lcp.mode = MC_(clo_leak_check);
   5993       lcp.show_reachable = MC_(clo_show_reachable);
   5994       lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
   5995       lcp.deltamode = LCD_Any;
   5996       lcp.requested_by_monitor_command = False;
   5997       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, lcp);
   5998    } else {
   5999       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   6000          VG_(umsg)(
   6001             "For a detailed leak analysis, rerun with: --leak-check=full\n"
   6002             "\n"
   6003          );
   6004       }
   6005    }
   6006 
   6007    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   6008       VG_(message)(Vg_UserMsg,
   6009                    "For counts of detected and suppressed errors, rerun with: -v\n");
   6010    }
   6011 
   6012    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
   6013        && MC_(clo_mc_level) == 2) {
   6014       VG_(message)(Vg_UserMsg,
   6015                    "Use --track-origins=yes to see where "
   6016                    "uninitialised values come from\n");
   6017    }
   6018 
   6019    done_prof_mem();
   6020 
   6021    if (VG_(clo_stats)) {
   6022       SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
   6023 
   6024       VG_(message)(Vg_DebugMsg,
   6025          " memcheck: sanity checks: %d cheap, %d expensive\n",
   6026          n_sanity_cheap, n_sanity_expensive );
   6027       VG_(message)(Vg_DebugMsg,
   6028          " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
   6029          n_auxmap_L2_nodes,
   6030          n_auxmap_L2_nodes * 64,
   6031          n_auxmap_L2_nodes / 16 );
   6032       VG_(message)(Vg_DebugMsg,
   6033          " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
   6034          n_auxmap_L1_searches, n_auxmap_L1_cmps,
   6035          (10ULL * n_auxmap_L1_cmps)
   6036             / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
   6037       );
   6038       VG_(message)(Vg_DebugMsg,
   6039          " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
   6040          n_auxmap_L2_searches, n_auxmap_L2_nodes
   6041       );
   6042 
   6043       print_SM_info("n_issued     ", n_issued_SMs);
   6044       print_SM_info("n_deissued   ", n_deissued_SMs);
   6045       print_SM_info("max_noaccess ", max_noaccess_SMs);
   6046       print_SM_info("max_undefined", max_undefined_SMs);
   6047       print_SM_info("max_defined  ", max_defined_SMs);
   6048       print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
   6049 
   6050       // Three DSMs, plus the non-DSM ones
   6051       max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
   6052       // The 3*sizeof(Word) bytes is the AVL node metadata size.
   6053       // The 4*sizeof(Word) bytes is the malloc metadata size.
   6054       // Hardwiring these sizes in sucks, but I don't see how else to do it.
   6055       max_secVBit_szB = max_secVBit_nodes *
   6056             (sizeof(SecVBitNode) + 3*sizeof(Word) + 4*sizeof(Word));
   6057       max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
   6058 
   6059       VG_(message)(Vg_DebugMsg,
   6060          " memcheck: max sec V bit nodes:    %d (%ldk, %ldM)\n",
   6061          max_secVBit_nodes, max_secVBit_szB / 1024,
   6062                             max_secVBit_szB / (1024 * 1024));
   6063       VG_(message)(Vg_DebugMsg,
   6064          " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
   6065          sec_vbits_new_nodes + sec_vbits_updates,
   6066          sec_vbits_new_nodes, sec_vbits_updates );
   6067       VG_(message)(Vg_DebugMsg,
   6068          " memcheck: max shadow mem size:   %ldk, %ldM\n",
   6069          max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
   6070 
   6071       if (MC_(clo_mc_level) >= 3) {
   6072          VG_(message)(Vg_DebugMsg,
   6073                       " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
   6074                       stats_ocacheL1_find,
   6075                       stats_ocacheL1_misses,
   6076                       stats_ocacheL1_lossage );
   6077          VG_(message)(Vg_DebugMsg,
   6078                       " ocacheL1: %'12lu at 0   %'12lu at 1\n",
   6079                       stats_ocacheL1_find - stats_ocacheL1_misses
   6080                          - stats_ocacheL1_found_at_1
   6081                          - stats_ocacheL1_found_at_N,
   6082                       stats_ocacheL1_found_at_1 );
   6083          VG_(message)(Vg_DebugMsg,
   6084                       " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
   6085                       stats_ocacheL1_found_at_N,
   6086                       stats_ocacheL1_movefwds );
   6087          VG_(message)(Vg_DebugMsg,
   6088                       " ocacheL1: %'12lu sizeB  %'12u useful\n",
   6089                       (UWord)sizeof(OCache),
   6090                       4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
   6091          VG_(message)(Vg_DebugMsg,
   6092                       " ocacheL2: %'12lu refs   %'12lu misses\n",
   6093                       stats__ocacheL2_refs,
   6094                       stats__ocacheL2_misses );
   6095          VG_(message)(Vg_DebugMsg,
   6096                       " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
   6097                       stats__ocacheL2_n_nodes_max,
   6098                       stats__ocacheL2_n_nodes );
   6099          VG_(message)(Vg_DebugMsg,
   6100                       " niacache: %'12lu refs   %'12lu misses\n",
   6101                       stats__nia_cache_queries, stats__nia_cache_misses);
   6102       } else {
   6103          tl_assert(ocacheL1 == NULL);
   6104          tl_assert(ocacheL2 == NULL);
   6105       }
   6106    }
   6107 
   6108    if (0) {
   6109       VG_(message)(Vg_DebugMsg,
   6110         "------ Valgrind's client block stats follow ---------------\n" );
   6111       show_client_block_stats();
   6112    }
   6113 }
   6114 
   6115 /* mark the given addr/len unaddressable for watchpoint implementation
   6116    The PointKind will be handled at access time */
   6117 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
   6118                                                   Addr addr, SizeT len)
   6119 {
   6120    /* GDBTD this is somewhat fishy. We might rather have to save the previous
   6121       accessibility and definedness in gdbserver so as to allow restoring it
   6122       properly. Currently, we assume that the user only watches things
   6123       which are properly addressable and defined */
   6124    if (insert)
   6125       MC_(make_mem_noaccess) (addr, len);
   6126    else
   6127       MC_(make_mem_defined)  (addr, len);
   6128    return True;
   6129 }
   6130 
   6131 static void mc_pre_clo_init(void)
   6132 {
   6133    VG_(details_name)            ("Memcheck");
   6134    VG_(details_version)         (NULL);
   6135    VG_(details_description)     ("a memory error detector");
   6136    VG_(details_copyright_author)(
   6137       "Copyright (C) 2002-2011, and GNU GPL'd, by Julian Seward et al.");
   6138    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   6139    VG_(details_avg_translation_sizeB) ( 640 );
   6140 
   6141    VG_(basic_tool_funcs)          (mc_post_clo_init,
   6142                                    MC_(instrument),
   6143                                    mc_fini);
   6144 
   6145    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
   6146 
   6147 
   6148    VG_(needs_core_errors)         ();
   6149    VG_(needs_tool_errors)         (MC_(eq_Error),
   6150                                    MC_(before_pp_Error),
   6151                                    MC_(pp_Error),
   6152                                    True,/*show TIDs for errors*/
   6153                                    MC_(update_Error_extra),
   6154                                    MC_(is_recognised_suppression),
   6155                                    MC_(read_extra_suppression_info),
   6156                                    MC_(error_matches_suppression),
   6157                                    MC_(get_error_name),
   6158                                    MC_(get_extra_suppression_info));
   6159    VG_(needs_libc_freeres)        ();
   6160    VG_(needs_command_line_options)(mc_process_cmd_line_options,
   6161                                    mc_print_usage,
   6162                                    mc_print_debug_usage);
   6163    VG_(needs_client_requests)     (mc_handle_client_request);
   6164    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
   6165                                    mc_expensive_sanity_check);
   6166    VG_(needs_malloc_replacement)  (MC_(malloc),
   6167                                    MC_(__builtin_new),
   6168                                    MC_(__builtin_vec_new),
   6169                                    MC_(memalign),
   6170                                    MC_(calloc),
   6171                                    MC_(free),
   6172                                    MC_(__builtin_delete),
   6173                                    MC_(__builtin_vec_delete),
   6174                                    MC_(realloc),
   6175                                    MC_(malloc_usable_size),
   6176                                    MC_MALLOC_REDZONE_SZB );
   6177 
   6178    VG_(needs_xml_output)          ();
   6179 
   6180    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
   6181    VG_(track_new_mem_stack_signal)( make_mem_undefined_w_tid );
   6182    // We assume that brk()/sbrk() does not initialise new memory.  Is this
   6183    // accurate?  John Reiser says:
   6184    //
   6185    //   0) sbrk() can *decrease* process address space.  No zero fill is done
   6186    //   for a decrease, not even the fragment on the high end of the last page
   6187    //   that is beyond the new highest address.  For maximum safety and
   6188    //   portability, then the bytes in the last page that reside above [the
   6189    //   new] sbrk(0) should be considered to be uninitialized, but in practice
   6190    //   it is exceedingly likely that they will retain their previous
   6191    //   contents.
   6192    //
   6193    //   1) If an increase is large enough to require new whole pages, then
   6194    //   those new whole pages (like all new pages) are zero-filled by the
   6195    //   operating system.  So if sbrk(0) already is page aligned, then
   6196    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
   6197    //
   6198    //   2) Any increase that lies within an existing allocated page is not
   6199    //   changed.  So if (x = sbrk(0)) is not page aligned, then
   6200    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
   6201    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
   6202    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
   6203    //   of them come along for the ride because the operating system deals
   6204    //   only in whole pages.  Again, for maximum safety and portability, then
   6205    //   anything that lives above [the new] sbrk(0) should be considered
   6206    //   uninitialized, but in practice will retain previous contents [zero in
   6207    //   this case.]"
   6208    //
   6209    // In short:
   6210    //
   6211    //   A key property of sbrk/brk is that new whole pages that are supplied
   6212    //   by the operating system *do* get initialized to zero.
   6213    //
   6214    // As for the portability of all this:
   6215    //
   6216    //   sbrk and brk are not POSIX.  However, any system that is a derivative
   6217    //   of *nix has sbrk and brk because there are too many softwares (such as
   6218    //   the Bourne shell) which rely on the traditional memory map (.text,
   6219    //   .data+.bss, stack) and the existence of sbrk/brk.
   6220    //
   6221    // So we should arguably observe all this.  However:
   6222    // - The current inaccuracy has caused maybe one complaint in seven years(?)
   6223    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
   6224    //   doubt most programmers know the above information.
   6225    // So I'm not terribly unhappy with marking it as undefined. --njn.
   6226    //
   6227    // [More:  I think most of what John said only applies to sbrk().  It seems
   6228    // that brk() always deals in whole pages.  And since this event deals
   6229    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
   6230    // just mark all memory it allocates as defined.]
   6231    //
   6232    VG_(track_new_mem_brk)         ( make_mem_undefined_w_tid );
   6233 
   6234    // Handling of mmap and mprotect isn't simple (well, it is simple,
   6235    // but the justification isn't.)  See comments above, just prior to
   6236    // mc_new_mem_mmap.
   6237    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
   6238    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
   6239 
   6240    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
   6241 
   6242    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
   6243    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
   6244    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
   6245 
   6246    /* Defer the specification of the new_mem_stack functions to the
   6247       post_clo_init function, since we need to first parse the command
   6248       line before deciding which set to use. */
   6249 
   6250 #  ifdef PERF_FAST_STACK
   6251    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
   6252    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
   6253    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
   6254    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
   6255    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
   6256    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
   6257    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
   6258    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
   6259    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
   6260 #  endif
   6261    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
   6262 
   6263    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
   6264 
   6265    VG_(track_pre_mem_read)        ( check_mem_is_defined );
   6266    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
   6267    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
   6268    VG_(track_post_mem_write)      ( mc_post_mem_write );
   6269 
   6270    if (MC_(clo_mc_level) >= 2)
   6271       VG_(track_pre_reg_read)     ( mc_pre_reg_read );
   6272 
   6273    VG_(track_post_reg_write)                  ( mc_post_reg_write );
   6274    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
   6275 
   6276    VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
   6277 
   6278    init_shadow_memory();
   6279    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
   6280    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
   6281    init_prof_mem();
   6282 
   6283    tl_assert( mc_expensive_sanity_check() );
   6284 
   6285    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
   6286    tl_assert(sizeof(UWord) == sizeof(Addr));
   6287    // Call me paranoid.  I don't care.
   6288    tl_assert(sizeof(void*) == sizeof(Addr));
   6289 
   6290    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
   6291    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
   6292 
   6293    /* This is small.  Always initialise it. */
   6294    init_nia_to_ecu_cache();
   6295 
   6296    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
   6297       if we need to, since the command line args haven't been
   6298       processed yet.  Hence defer it to mc_post_clo_init. */
   6299    tl_assert(ocacheL1 == NULL);
   6300    tl_assert(ocacheL2 == NULL);
   6301 
   6302    /* Check some important stuff.  See extensive comments above
   6303       re UNALIGNED_OR_HIGH for background. */
   6304 #  if VG_WORDSIZE == 4
   6305    tl_assert(sizeof(void*) == 4);
   6306    tl_assert(sizeof(Addr)  == 4);
   6307    tl_assert(sizeof(UWord) == 4);
   6308    tl_assert(sizeof(Word)  == 4);
   6309    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
   6310    tl_assert(MASK(1) == 0UL);
   6311    tl_assert(MASK(2) == 1UL);
   6312    tl_assert(MASK(4) == 3UL);
   6313    tl_assert(MASK(8) == 7UL);
   6314 #  else
   6315    tl_assert(VG_WORDSIZE == 8);
   6316    tl_assert(sizeof(void*) == 8);
   6317    tl_assert(sizeof(Addr)  == 8);
   6318    tl_assert(sizeof(UWord) == 8);
   6319    tl_assert(sizeof(Word)  == 8);
   6320    tl_assert(MAX_PRIMARY_ADDRESS == 0x3FFFFFFFFFULL);
   6321    tl_assert(MASK(1) == 0xFFFFFFC000000000ULL);
   6322    tl_assert(MASK(2) == 0xFFFFFFC000000001ULL);
   6323    tl_assert(MASK(4) == 0xFFFFFFC000000003ULL);
   6324    tl_assert(MASK(8) == 0xFFFFFFC000000007ULL);
   6325 #  endif
   6326 }
   6327 
   6328 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
   6329 
   6330 /*--------------------------------------------------------------------*/
   6331 /*--- end                                                mc_main.c ---*/
   6332 /*--------------------------------------------------------------------*/
   6333