Home | History | Annotate | Download | only in memcheck
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
      4 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
      5 /*---                                                    mc_main.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of MemCheck, a heavyweight Valgrind tool for
     10    detecting memory errors.
     11 
     12    Copyright (C) 2000-2013 Julian Seward
     13       jseward (at) acm.org
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 */
     32 
     33 #include "pub_tool_basics.h"
     34 #include "pub_tool_aspacemgr.h"
     35 #include "pub_tool_gdbserver.h"
     36 #include "pub_tool_poolalloc.h"
     37 #include "pub_tool_hashtable.h"     // For mc_include.h
     38 #include "pub_tool_libcbase.h"
     39 #include "pub_tool_libcassert.h"
     40 #include "pub_tool_libcprint.h"
     41 #include "pub_tool_machine.h"
     42 #include "pub_tool_mallocfree.h"
     43 #include "pub_tool_options.h"
     44 #include "pub_tool_oset.h"
     45 #include "pub_tool_rangemap.h"
     46 #include "pub_tool_replacemalloc.h"
     47 #include "pub_tool_tooliface.h"
     48 #include "pub_tool_threadstate.h"
     49 
     50 #include "mc_include.h"
     51 #include "memcheck.h"   /* for client requests */
     52 
     53 
     54 /* Set to 1 to do a little more sanity checking */
     55 #define VG_DEBUG_MEMORY 0
     56 
     57 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
     58 
     59 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
     60 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
     61 
     62 
     63 /*------------------------------------------------------------*/
     64 /*--- Fast-case knobs                                      ---*/
     65 /*------------------------------------------------------------*/
     66 
     67 // Comment these out to disable the fast cases (don't just set them to zero).
     68 
     69 #define PERF_FAST_LOADV    1
     70 #define PERF_FAST_STOREV   1
     71 
     72 #define PERF_FAST_SARP     1
     73 
     74 #define PERF_FAST_STACK    1
     75 #define PERF_FAST_STACK2   1
     76 
     77 /* Change this to 1 to enable assertions on origin tracking cache fast
     78    paths */
     79 #define OC_ENABLE_ASSERTIONS 0
     80 
     81 
     82 /*------------------------------------------------------------*/
     83 /*--- Comments on the origin tracking implementation       ---*/
     84 /*------------------------------------------------------------*/
     85 
     86 /* See detailed comment entitled
     87    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
     88    which is contained further on in this file. */
     89 
     90 
     91 /*------------------------------------------------------------*/
     92 /*--- V bits and A bits                                    ---*/
     93 /*------------------------------------------------------------*/
     94 
     95 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
     96    thinks the corresponding value bit is defined.  And every memory byte
     97    has an A bit, which tracks whether Memcheck thinks the program can access
     98    it safely (ie. it's mapped, and has at least one of the RWX permission bits
     99    set).  So every N-bit register is shadowed with N V bits, and every memory
    100    byte is shadowed with 8 V bits and one A bit.
    101 
    102    In the implementation, we use two forms of compression (compressed V bits
    103    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
    104    for memory.
    105 
    106    Memcheck also tracks extra information about each heap block that is
    107    allocated, for detecting memory leaks and other purposes.
    108 */
    109 
    110 /*------------------------------------------------------------*/
    111 /*--- Basic A/V bitmap representation.                     ---*/
    112 /*------------------------------------------------------------*/
    113 
    114 /* All reads and writes are checked against a memory map (a.k.a. shadow
    115    memory), which records the state of all memory in the process.
    116 
    117    On 32-bit machines the memory map is organised as follows.
    118    The top 16 bits of an address are used to index into a top-level
    119    map table, containing 65536 entries.  Each entry is a pointer to a
    120    second-level map, which records the accesibililty and validity
    121    permissions for the 65536 bytes indexed by the lower 16 bits of the
    122    address.  Each byte is represented by two bits (details are below).  So
    123    each second-level map contains 16384 bytes.  This two-level arrangement
    124    conveniently divides the 4G address space into 64k lumps, each size 64k
    125    bytes.
    126 
    127    All entries in the primary (top-level) map must point to a valid
    128    secondary (second-level) map.  Since many of the 64kB chunks will
    129    have the same status for every bit -- ie. noaccess (for unused
    130    address space) or entirely addressable and defined (for code segments) --
    131    there are three distinguished secondary maps, which indicate 'noaccess',
    132    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
    133    map entry points to the relevant distinguished map.  In practice,
    134    typically more than half of the addressable memory is represented with
    135    the 'undefined' or 'defined' distinguished secondary map, so it gives a
    136    good saving.  It also lets us set the V+A bits of large address regions
    137    quickly in set_address_range_perms().
    138 
    139    On 64-bit machines it's more complicated.  If we followed the same basic
    140    scheme we'd have a four-level table which would require too many memory
    141    accesses.  So instead the top-level map table has 2^19 entries (indexed
    142    using bits 16..34 of the address);  this covers the bottom 32GB.  Any
    143    accesses above 32GB are handled with a slow, sparse auxiliary table.
    144    Valgrind's address space manager tries very hard to keep things below
    145    this 32GB barrier so that performance doesn't suffer too much.
    146 
    147    Note that this file has a lot of different functions for reading and
    148    writing shadow memory.  Only a couple are strictly necessary (eg.
    149    get_vabits2 and set_vabits2), most are just specialised for specific
    150    common cases to improve performance.
    151 
    152    Aside: the V+A bits are less precise than they could be -- we have no way
    153    of marking memory as read-only.  It would be great if we could add an
    154    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
    155    which requires 2.3 bits to hold, and there's no way to do that elegantly
    156    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
    157    seem worth it.
    158 */
    159 
    160 /* --------------- Basic configuration --------------- */
    161 
    162 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
    163 
    164 #if VG_WORDSIZE == 4
    165 
    166 /* cover the entire address space */
    167 #  define N_PRIMARY_BITS  16
    168 
    169 #else
    170 
    171 /* Just handle the first 64G fast and the rest via auxiliary
    172    primaries.  If you change this, Memcheck will assert at startup.
    173    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
    174 #  define N_PRIMARY_BITS  20
    175 
    176 #endif
    177 
    178 
    179 /* Do not change this. */
    180 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
    181 
    182 /* Do not change this. */
    183 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
    184 
    185 
    186 /* --------------- Secondary maps --------------- */
    187 
    188 // Each byte of memory conceptually has an A bit, which indicates its
    189 // addressability, and 8 V bits, which indicates its definedness.
    190 //
    191 // But because very few bytes are partially defined, we can use a nice
    192 // compression scheme to reduce the size of shadow memory.  Each byte of
    193 // memory has 2 bits which indicates its state (ie. V+A bits):
    194 //
    195 //   00:  noaccess    (unaddressable but treated as fully defined)
    196 //   01:  undefined   (addressable and fully undefined)
    197 //   10:  defined     (addressable and fully defined)
    198 //   11:  partdefined (addressable and partially defined)
    199 //
    200 // In the "partdefined" case, we use a secondary table to store the V bits.
    201 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
    202 // bits.
    203 //
    204 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
    205 // four bytes (32 bits) of memory are in each chunk.  Hence the name
    206 // "vabits8".  This lets us get the V+A bits for four bytes at a time
    207 // easily (without having to do any shifting and/or masking), and that is a
    208 // very common operation.  (Note that although each vabits8 chunk
    209 // is 8 bits in size, it represents 32 bits of memory.)
    210 //
    211 // The representation is "inverse" little-endian... each 4 bytes of
    212 // memory is represented by a 1 byte value, where:
    213 //
    214 // - the status of byte (a+0) is held in bits [1..0]
    215 // - the status of byte (a+1) is held in bits [3..2]
    216 // - the status of byte (a+2) is held in bits [5..4]
    217 // - the status of byte (a+3) is held in bits [7..6]
    218 //
    219 // It's "inverse" because endianness normally describes a mapping from
    220 // value bits to memory addresses;  in this case the mapping is inverted.
    221 // Ie. instead of particular value bits being held in certain addresses, in
    222 // this case certain addresses are represented by particular value bits.
    223 // See insert_vabits2_into_vabits8() for an example.
    224 //
    225 // But note that we don't compress the V bits stored in registers;  they
    226 // need to be explicit to made the shadow operations possible.  Therefore
    227 // when moving values between registers and memory we need to convert
    228 // between the expanded in-register format and the compressed in-memory
    229 // format.  This isn't so difficult, it just requires careful attention in a
    230 // few places.
    231 
    232 // These represent eight bits of memory.
    233 #define VA_BITS2_NOACCESS     0x0      // 00b
    234 #define VA_BITS2_UNDEFINED    0x1      // 01b
    235 #define VA_BITS2_DEFINED      0x2      // 10b
    236 #define VA_BITS2_PARTDEFINED  0x3      // 11b
    237 
    238 // These represent 16 bits of memory.
    239 #define VA_BITS4_NOACCESS     0x0      // 00_00b
    240 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
    241 #define VA_BITS4_DEFINED      0xa      // 10_10b
    242 
    243 // These represent 32 bits of memory.
    244 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
    245 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
    246 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
    247 
    248 // These represent 64 bits of memory.
    249 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
    250 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
    251 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
    252 
    253 
    254 #define SM_CHUNKS             16384
    255 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
    256 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
    257 
    258 // Paranoia:  it's critical for performance that the requested inlining
    259 // occurs.  So try extra hard.
    260 #define INLINE    inline __attribute__((always_inline))
    261 
    262 static INLINE Addr start_of_this_sm ( Addr a ) {
    263    return (a & (~SM_MASK));
    264 }
    265 static INLINE Bool is_start_of_sm ( Addr a ) {
    266    return (start_of_this_sm(a) == a);
    267 }
    268 
    269 typedef
    270    struct {
    271       UChar vabits8[SM_CHUNKS];
    272    }
    273    SecMap;
    274 
    275 // 3 distinguished secondary maps, one for no-access, one for
    276 // accessible but undefined, and one for accessible and defined.
    277 // Distinguished secondaries may never be modified.
    278 #define SM_DIST_NOACCESS   0
    279 #define SM_DIST_UNDEFINED  1
    280 #define SM_DIST_DEFINED    2
    281 
    282 static SecMap sm_distinguished[3];
    283 
    284 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
    285    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
    286 }
    287 
    288 // Forward declaration
    289 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
    290 
    291 /* dist_sm points to one of our three distinguished secondaries.  Make
    292    a copy of it so that we can write to it.
    293 */
    294 static SecMap* copy_for_writing ( SecMap* dist_sm )
    295 {
    296    SecMap* new_sm;
    297    tl_assert(dist_sm == &sm_distinguished[0]
    298           || dist_sm == &sm_distinguished[1]
    299           || dist_sm == &sm_distinguished[2]);
    300 
    301    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
    302    if (new_sm == NULL)
    303       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
    304                                    sizeof(SecMap) );
    305    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
    306    update_SM_counts(dist_sm, new_sm);
    307    return new_sm;
    308 }
    309 
    310 /* --------------- Stats --------------- */
    311 
    312 static Int   n_issued_SMs      = 0;
    313 static Int   n_deissued_SMs    = 0;
    314 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
    315 static Int   n_undefined_SMs   = 0;
    316 static Int   n_defined_SMs     = 0;
    317 static Int   n_non_DSM_SMs     = 0;
    318 static Int   max_noaccess_SMs  = 0;
    319 static Int   max_undefined_SMs = 0;
    320 static Int   max_defined_SMs   = 0;
    321 static Int   max_non_DSM_SMs   = 0;
    322 
    323 /* # searches initiated in auxmap_L1, and # base cmps required */
    324 static ULong n_auxmap_L1_searches  = 0;
    325 static ULong n_auxmap_L1_cmps      = 0;
    326 /* # of searches that missed in auxmap_L1 and therefore had to
    327    be handed to auxmap_L2. And the number of nodes inserted. */
    328 static ULong n_auxmap_L2_searches  = 0;
    329 static ULong n_auxmap_L2_nodes     = 0;
    330 
    331 static Int   n_sanity_cheap     = 0;
    332 static Int   n_sanity_expensive = 0;
    333 
    334 static Int   n_secVBit_nodes   = 0;
    335 static Int   max_secVBit_nodes = 0;
    336 
    337 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
    338 {
    339    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
    340    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
    341    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
    342    else                                                  { n_non_DSM_SMs  --;
    343                                                            n_deissued_SMs ++; }
    344 
    345    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
    346    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
    347    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
    348    else                                                  { n_non_DSM_SMs  ++;
    349                                                            n_issued_SMs   ++; }
    350 
    351    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
    352    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
    353    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
    354    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
    355 }
    356 
    357 /* --------------- Primary maps --------------- */
    358 
    359 /* The main primary map.  This covers some initial part of the address
    360    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
    361    handled using the auxiliary primary map.
    362 */
    363 static SecMap* primary_map[N_PRIMARY_MAP];
    364 
    365 
    366 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
    367    value, and sm points at the relevant secondary map.  As with the
    368    main primary map, the secondary may be either a real secondary, or
    369    one of the three distinguished secondaries.  DO NOT CHANGE THIS
    370    LAYOUT: the first word has to be the key for OSet fast lookups.
    371 */
    372 typedef
    373    struct {
    374       Addr    base;
    375       SecMap* sm;
    376    }
    377    AuxMapEnt;
    378 
    379 /* Tunable parameter: How big is the L1 queue? */
    380 #define N_AUXMAP_L1 24
    381 
    382 /* Tunable parameter: How far along the L1 queue to insert
    383    entries resulting from L2 lookups? */
    384 #define AUXMAP_L1_INSERT_IX 12
    385 
    386 static struct {
    387           Addr       base;
    388           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
    389        }
    390        auxmap_L1[N_AUXMAP_L1];
    391 
    392 static OSet* auxmap_L2 = NULL;
    393 
    394 static void init_auxmap_L1_L2 ( void )
    395 {
    396    Int i;
    397    for (i = 0; i < N_AUXMAP_L1; i++) {
    398       auxmap_L1[i].base = 0;
    399       auxmap_L1[i].ent  = NULL;
    400    }
    401 
    402    tl_assert(0 == offsetof(AuxMapEnt,base));
    403    tl_assert(sizeof(Addr) == sizeof(void*));
    404    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
    405                                     /*fastCmp*/ NULL,
    406                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
    407 }
    408 
    409 /* Check representation invariants; if OK return NULL; else a
    410    descriptive bit of text.  Also return the number of
    411    non-distinguished secondary maps referred to from the auxiliary
    412    primary maps. */
    413 
    414 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
    415 {
    416    Word i, j;
    417    /* On a 32-bit platform, the L2 and L1 tables should
    418       both remain empty forever.
    419 
    420       On a 64-bit platform:
    421       In the L2 table:
    422        all .base & 0xFFFF == 0
    423        all .base > MAX_PRIMARY_ADDRESS
    424       In the L1 table:
    425        all .base & 0xFFFF == 0
    426        all (.base > MAX_PRIMARY_ADDRESS
    427             .base & 0xFFFF == 0
    428             and .ent points to an AuxMapEnt with the same .base)
    429            or
    430            (.base == 0 and .ent == NULL)
    431    */
    432    *n_secmaps_found = 0;
    433    if (sizeof(void*) == 4) {
    434       /* 32-bit platform */
    435       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
    436          return "32-bit: auxmap_L2 is non-empty";
    437       for (i = 0; i < N_AUXMAP_L1; i++)
    438         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
    439       return "32-bit: auxmap_L1 is non-empty";
    440    } else {
    441       /* 64-bit platform */
    442       UWord elems_seen = 0;
    443       AuxMapEnt *elem, *res;
    444       AuxMapEnt key;
    445       /* L2 table */
    446       VG_(OSetGen_ResetIter)(auxmap_L2);
    447       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
    448          elems_seen++;
    449          if (0 != (elem->base & (Addr)0xFFFF))
    450             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
    451          if (elem->base <= MAX_PRIMARY_ADDRESS)
    452             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
    453          if (elem->sm == NULL)
    454             return "64-bit: .sm in _L2 is NULL";
    455          if (!is_distinguished_sm(elem->sm))
    456             (*n_secmaps_found)++;
    457       }
    458       if (elems_seen != n_auxmap_L2_nodes)
    459          return "64-bit: disagreement on number of elems in _L2";
    460       /* Check L1-L2 correspondence */
    461       for (i = 0; i < N_AUXMAP_L1; i++) {
    462          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
    463             continue;
    464          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
    465             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
    466          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
    467             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
    468          if (auxmap_L1[i].ent == NULL)
    469             return "64-bit: .ent is NULL in auxmap_L1";
    470          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
    471             return "64-bit: _L1 and _L2 bases are inconsistent";
    472          /* Look it up in auxmap_L2. */
    473          key.base = auxmap_L1[i].base;
    474          key.sm   = 0;
    475          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    476          if (res == NULL)
    477             return "64-bit: _L1 .base not found in _L2";
    478          if (res != auxmap_L1[i].ent)
    479             return "64-bit: _L1 .ent disagrees with _L2 entry";
    480       }
    481       /* Check L1 contains no duplicates */
    482       for (i = 0; i < N_AUXMAP_L1; i++) {
    483          if (auxmap_L1[i].base == 0)
    484             continue;
    485 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
    486             if (auxmap_L1[j].base == 0)
    487                continue;
    488             if (auxmap_L1[j].base == auxmap_L1[i].base)
    489                return "64-bit: duplicate _L1 .base entries";
    490          }
    491       }
    492    }
    493    return NULL; /* ok */
    494 }
    495 
    496 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
    497 {
    498    Word i;
    499    tl_assert(ent);
    500    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
    501    for (i = N_AUXMAP_L1-1; i > rank; i--)
    502       auxmap_L1[i] = auxmap_L1[i-1];
    503    auxmap_L1[rank].base = ent->base;
    504    auxmap_L1[rank].ent  = ent;
    505 }
    506 
    507 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
    508 {
    509    AuxMapEnt  key;
    510    AuxMapEnt* res;
    511    Word       i;
    512 
    513    tl_assert(a > MAX_PRIMARY_ADDRESS);
    514    a &= ~(Addr)0xFFFF;
    515 
    516    /* First search the front-cache, which is a self-organising
    517       list containing the most popular entries. */
    518 
    519    if (LIKELY(auxmap_L1[0].base == a))
    520       return auxmap_L1[0].ent;
    521    if (LIKELY(auxmap_L1[1].base == a)) {
    522       Addr       t_base = auxmap_L1[0].base;
    523       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
    524       auxmap_L1[0].base = auxmap_L1[1].base;
    525       auxmap_L1[0].ent  = auxmap_L1[1].ent;
    526       auxmap_L1[1].base = t_base;
    527       auxmap_L1[1].ent  = t_ent;
    528       return auxmap_L1[0].ent;
    529    }
    530 
    531    n_auxmap_L1_searches++;
    532 
    533    for (i = 0; i < N_AUXMAP_L1; i++) {
    534       if (auxmap_L1[i].base == a) {
    535          break;
    536       }
    537    }
    538    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
    539 
    540    n_auxmap_L1_cmps += (ULong)(i+1);
    541 
    542    if (i < N_AUXMAP_L1) {
    543       if (i > 0) {
    544          Addr       t_base = auxmap_L1[i-1].base;
    545          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
    546          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
    547          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
    548          auxmap_L1[i-0].base = t_base;
    549          auxmap_L1[i-0].ent  = t_ent;
    550          i--;
    551       }
    552       return auxmap_L1[i].ent;
    553    }
    554 
    555    n_auxmap_L2_searches++;
    556 
    557    /* First see if we already have it. */
    558    key.base = a;
    559    key.sm   = 0;
    560 
    561    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    562    if (res)
    563       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
    564    return res;
    565 }
    566 
    567 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
    568 {
    569    AuxMapEnt *nyu, *res;
    570 
    571    /* First see if we already have it. */
    572    res = maybe_find_in_auxmap( a );
    573    if (LIKELY(res))
    574       return res;
    575 
    576    /* Ok, there's no entry in the secondary map, so we'll have
    577       to allocate one. */
    578    a &= ~(Addr)0xFFFF;
    579 
    580    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
    581    tl_assert(nyu);
    582    nyu->base = a;
    583    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
    584    VG_(OSetGen_Insert)( auxmap_L2, nyu );
    585    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
    586    n_auxmap_L2_nodes++;
    587    return nyu;
    588 }
    589 
    590 /* --------------- SecMap fundamentals --------------- */
    591 
    592 // In all these, 'low' means it's definitely in the main primary map,
    593 // 'high' means it's definitely in the auxiliary table.
    594 
    595 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
    596 {
    597    UWord pm_off = a >> 16;
    598 #  if VG_DEBUG_MEMORY >= 1
    599    tl_assert(pm_off < N_PRIMARY_MAP);
    600 #  endif
    601    return &primary_map[ pm_off ];
    602 }
    603 
    604 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
    605 {
    606    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
    607    return &am->sm;
    608 }
    609 
    610 static INLINE SecMap** get_secmap_ptr ( Addr a )
    611 {
    612    return ( a <= MAX_PRIMARY_ADDRESS
    613           ? get_secmap_low_ptr(a)
    614           : get_secmap_high_ptr(a));
    615 }
    616 
    617 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
    618 {
    619    return *get_secmap_low_ptr(a);
    620 }
    621 
    622 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
    623 {
    624    return *get_secmap_high_ptr(a);
    625 }
    626 
    627 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
    628 {
    629    SecMap** p = get_secmap_low_ptr(a);
    630    if (UNLIKELY(is_distinguished_sm(*p)))
    631       *p = copy_for_writing(*p);
    632    return *p;
    633 }
    634 
    635 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
    636 {
    637    SecMap** p = get_secmap_high_ptr(a);
    638    if (UNLIKELY(is_distinguished_sm(*p)))
    639       *p = copy_for_writing(*p);
    640    return *p;
    641 }
    642 
    643 /* Produce the secmap for 'a', either from the primary map or by
    644    ensuring there is an entry for it in the aux primary map.  The
    645    secmap may be a distinguished one as the caller will only want to
    646    be able to read it.
    647 */
    648 static INLINE SecMap* get_secmap_for_reading ( Addr a )
    649 {
    650    return ( a <= MAX_PRIMARY_ADDRESS
    651           ? get_secmap_for_reading_low (a)
    652           : get_secmap_for_reading_high(a) );
    653 }
    654 
    655 /* Produce the secmap for 'a', either from the primary map or by
    656    ensuring there is an entry for it in the aux primary map.  The
    657    secmap may not be a distinguished one, since the caller will want
    658    to be able to write it.  If it is a distinguished secondary, make a
    659    writable copy of it, install it, and return the copy instead.  (COW
    660    semantics).
    661 */
    662 static INLINE SecMap* get_secmap_for_writing ( Addr a )
    663 {
    664    return ( a <= MAX_PRIMARY_ADDRESS
    665           ? get_secmap_for_writing_low (a)
    666           : get_secmap_for_writing_high(a) );
    667 }
    668 
    669 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
    670    allocate one if one doesn't already exist.  This is used by the
    671    leak checker.
    672 */
    673 static SecMap* maybe_get_secmap_for ( Addr a )
    674 {
    675    if (a <= MAX_PRIMARY_ADDRESS) {
    676       return get_secmap_for_reading_low(a);
    677    } else {
    678       AuxMapEnt* am = maybe_find_in_auxmap(a);
    679       return am ? am->sm : NULL;
    680    }
    681 }
    682 
    683 /* --------------- Fundamental functions --------------- */
    684 
    685 static INLINE
    686 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
    687 {
    688    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
    689    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
    690    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
    691 }
    692 
    693 static INLINE
    694 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
    695 {
    696    UInt shift;
    697    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    698    shift     =  (a & 2)   << 1;        // shift by 0 or 4
    699    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
    700    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
    701 }
    702 
    703 static INLINE
    704 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
    705 {
    706    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
    707    vabits8 >>= shift;                  // shift the two bits to the bottom
    708    return 0x3 & vabits8;               // mask out the rest
    709 }
    710 
    711 static INLINE
    712 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
    713 {
    714    UInt shift;
    715    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    716    shift = (a & 2) << 1;               // shift by 0 or 4
    717    vabits8 >>= shift;                  // shift the four bits to the bottom
    718    return 0xf & vabits8;               // mask out the rest
    719 }
    720 
    721 // Note that these four are only used in slow cases.  The fast cases do
    722 // clever things like combine the auxmap check (in
    723 // get_secmap_{read,writ}able) with alignment checks.
    724 
    725 // *** WARNING! ***
    726 // Any time this function is called, if it is possible that vabits2
    727 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
    728 // sec-V-bits table must also be set!
    729 static INLINE
    730 void set_vabits2 ( Addr a, UChar vabits2 )
    731 {
    732    SecMap* sm       = get_secmap_for_writing(a);
    733    UWord   sm_off   = SM_OFF(a);
    734    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
    735 }
    736 
    737 static INLINE
    738 UChar get_vabits2 ( Addr a )
    739 {
    740    SecMap* sm       = get_secmap_for_reading(a);
    741    UWord   sm_off   = SM_OFF(a);
    742    UChar   vabits8  = sm->vabits8[sm_off];
    743    return extract_vabits2_from_vabits8(a, vabits8);
    744 }
    745 
    746 // *** WARNING! ***
    747 // Any time this function is called, if it is possible that any of the
    748 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
    749 // corresponding entry(s) in the sec-V-bits table must also be set!
    750 static INLINE
    751 UChar get_vabits8_for_aligned_word32 ( Addr a )
    752 {
    753    SecMap* sm       = get_secmap_for_reading(a);
    754    UWord   sm_off   = SM_OFF(a);
    755    UChar   vabits8  = sm->vabits8[sm_off];
    756    return vabits8;
    757 }
    758 
    759 static INLINE
    760 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
    761 {
    762    SecMap* sm       = get_secmap_for_writing(a);
    763    UWord   sm_off   = SM_OFF(a);
    764    sm->vabits8[sm_off] = vabits8;
    765 }
    766 
    767 
    768 // Forward declarations
    769 static UWord get_sec_vbits8(Addr a);
    770 static void  set_sec_vbits8(Addr a, UWord vbits8);
    771 
    772 // Returns False if there was an addressability error.
    773 static INLINE
    774 Bool set_vbits8 ( Addr a, UChar vbits8 )
    775 {
    776    Bool  ok      = True;
    777    UChar vabits2 = get_vabits2(a);
    778    if ( VA_BITS2_NOACCESS != vabits2 ) {
    779       // Addressable.  Convert in-register format to in-memory format.
    780       // Also remove any existing sec V bit entry for the byte if no
    781       // longer necessary.
    782       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
    783       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
    784       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
    785                                                 set_sec_vbits8(a, vbits8);  }
    786       set_vabits2(a, vabits2);
    787 
    788    } else {
    789       // Unaddressable!  Do nothing -- when writing to unaddressable
    790       // memory it acts as a black hole, and the V bits can never be seen
    791       // again.  So we don't have to write them at all.
    792       ok = False;
    793    }
    794    return ok;
    795 }
    796 
    797 // Returns False if there was an addressability error.  In that case, we put
    798 // all defined bits into vbits8.
    799 static INLINE
    800 Bool get_vbits8 ( Addr a, UChar* vbits8 )
    801 {
    802    Bool  ok      = True;
    803    UChar vabits2 = get_vabits2(a);
    804 
    805    // Convert the in-memory format to in-register format.
    806    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
    807    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
    808    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
    809       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
    810       ok = False;
    811    } else {
    812       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
    813       *vbits8 = get_sec_vbits8(a);
    814    }
    815    return ok;
    816 }
    817 
    818 
    819 /* --------------- Secondary V bit table ------------ */
    820 
    821 // This table holds the full V bit pattern for partially-defined bytes
    822 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
    823 // memory.
    824 //
    825 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
    826 // then overwrite the same address with a fully defined byte, the sec-V-bit
    827 // node will not necessarily be removed.  This is because checking for
    828 // whether removal is necessary would slow down the fast paths.
    829 //
    830 // To avoid the stale nodes building up too much, we periodically (once the
    831 // table reaches a certain size) garbage collect (GC) the table by
    832 // traversing it and evicting any nodes not having PDB.
    833 // If more than a certain proportion of nodes survived, we increase the
    834 // table size so that GCs occur less often.
    835 //
    836 // This policy is designed to avoid bad table bloat in the worst case where
    837 // a program creates huge numbers of stale PDBs -- we would get this bloat
    838 // if we had no GC -- while handling well the case where a node becomes
    839 // stale but shortly afterwards is rewritten with a PDB and so becomes
    840 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
    841 // remove all stale nodes as soon as possible, we just end up re-adding a
    842 // lot of them in later again.  The "sufficiently stale" approach avoids
    843 // this.  (If a program has many live PDBs, performance will just suck,
    844 // there's no way around that.)
    845 //
    846 // Further comments, JRS 14 Feb 2012.  It turns out that the policy of
    847 // holding on to stale entries for 2 GCs before discarding them can lead
    848 // to massive space leaks.  So we're changing to an arrangement where
    849 // lines are evicted as soon as they are observed to be stale during a
    850 // GC.  This also has a side benefit of allowing the sufficiently_stale
    851 // field to be removed from the SecVBitNode struct, reducing its size by
    852 // 8 bytes, which is a substantial space saving considering that the
    853 // struct was previously 32 or so bytes, on a 64 bit target.
    854 //
    855 // In order to try and mitigate the problem that the "sufficiently stale"
    856 // heuristic was designed to avoid, the table size is allowed to drift
    857 // up ("DRIFTUP") slowly to 80000, even if the residency is low.  This
    858 // means that nodes will exist in the table longer on average, and hopefully
    859 // will be deleted and re-added less frequently.
    860 //
    861 // The previous scaling up mechanism (now called STEPUP) is retained:
    862 // if residency exceeds 50%, the table is scaled up, although by a
    863 // factor sqrt(2) rather than 2 as before.  This effectively doubles the
    864 // frequency of GCs when there are many PDBs at reduces the tendency of
    865 // stale PDBs to reside for long periods in the table.
    866 
    867 static OSet* secVBitTable;
    868 
    869 // Stats
    870 static ULong sec_vbits_new_nodes = 0;
    871 static ULong sec_vbits_updates   = 0;
    872 
    873 // This must be a power of two;  this is checked in mc_pre_clo_init().
    874 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
    875 // a larger address range) they take more space but we can get multiple
    876 // partially-defined bytes in one if they are close to each other, reducing
    877 // the number of total nodes.  In practice sometimes they are clustered (eg.
    878 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
    879 // row), but often not.  So we choose something intermediate.
    880 #define BYTES_PER_SEC_VBIT_NODE     16
    881 
    882 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
    883 // more than this many nodes survive a GC.
    884 #define STEPUP_SURVIVOR_PROPORTION  0.5
    885 #define STEPUP_GROWTH_FACTOR        1.414213562
    886 
    887 // If the above heuristic doesn't apply, then we may make the table
    888 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
    889 // this many nodes survive a GC, _and_ the total table size does
    890 // not exceed a fixed limit.  The numbers are somewhat arbitrary, but
    891 // work tolerably well on long Firefox runs.  The scaleup ratio of 1.5%
    892 // effectively although gradually reduces residency and increases time
    893 // between GCs for programs with small numbers of PDBs.  The 80000 limit
    894 // effectively limits the table size to around 2MB for programs with
    895 // small numbers of PDBs, whilst giving a reasonably long lifetime to
    896 // entries, to try and reduce the costs resulting from deleting and
    897 // re-adding of entries.
    898 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
    899 #define DRIFTUP_GROWTH_FACTOR       1.015
    900 #define DRIFTUP_MAX_SIZE            80000
    901 
    902 // We GC the table when it gets this many nodes in it, ie. it's effectively
    903 // the table size.  It can change.
    904 static Int  secVBitLimit = 1000;
    905 
    906 // The number of GCs done, used to age sec-V-bit nodes for eviction.
    907 // Because it's unsigned, wrapping doesn't matter -- the right answer will
    908 // come out anyway.
    909 static UInt GCs_done = 0;
    910 
    911 typedef
    912    struct {
    913       Addr  a;
    914       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
    915    }
    916    SecVBitNode;
    917 
    918 static OSet* createSecVBitTable(void)
    919 {
    920    OSet* newSecVBitTable;
    921    newSecVBitTable = VG_(OSetGen_Create_With_Pool)
    922       ( offsetof(SecVBitNode, a),
    923         NULL, // use fast comparisons
    924         VG_(malloc), "mc.cSVT.1 (sec VBit table)",
    925         VG_(free),
    926         1000,
    927         sizeof(SecVBitNode));
    928    return newSecVBitTable;
    929 }
    930 
    931 static void gcSecVBitTable(void)
    932 {
    933    OSet*        secVBitTable2;
    934    SecVBitNode* n;
    935    Int          i, n_nodes = 0, n_survivors = 0;
    936 
    937    GCs_done++;
    938 
    939    // Create the new table.
    940    secVBitTable2 = createSecVBitTable();
    941 
    942    // Traverse the table, moving fresh nodes into the new table.
    943    VG_(OSetGen_ResetIter)(secVBitTable);
    944    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
    945       // Keep node if any of its bytes are non-stale.  Using
    946       // get_vabits2() for the lookup is not very efficient, but I don't
    947       // think it matters.
    948       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
    949          if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
    950             // Found a non-stale byte, so keep =>
    951             // Insert a copy of the node into the new table.
    952             SecVBitNode* n2 =
    953                VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
    954             *n2 = *n;
    955             VG_(OSetGen_Insert)(secVBitTable2, n2);
    956             break;
    957          }
    958       }
    959    }
    960 
    961    // Get the before and after sizes.
    962    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
    963    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
    964 
    965    // Destroy the old table, and put the new one in its place.
    966    VG_(OSetGen_Destroy)(secVBitTable);
    967    secVBitTable = secVBitTable2;
    968 
    969    if (VG_(clo_verbosity) > 1) {
    970       HChar percbuf[7];
    971       VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf);
    972       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n",
    973                    n_nodes, n_survivors, percbuf);
    974    }
    975 
    976    // Increase table size if necessary.
    977    if ((Double)n_survivors
    978        > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
    979       secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
    980       if (VG_(clo_verbosity) > 1)
    981          VG_(message)(Vg_DebugMsg,
    982                       "memcheck GC: %d new table size (stepup)\n",
    983                       secVBitLimit);
    984    }
    985    else
    986    if (secVBitLimit < DRIFTUP_MAX_SIZE
    987        && (Double)n_survivors
    988           > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
    989       secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
    990       if (VG_(clo_verbosity) > 1)
    991          VG_(message)(Vg_DebugMsg,
    992                       "memcheck GC: %d new table size (driftup)\n",
    993                       secVBitLimit);
    994    }
    995 }
    996 
    997 static UWord get_sec_vbits8(Addr a)
    998 {
    999    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
   1000    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
   1001    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
   1002    UChar        vbits8;
   1003    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
   1004    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
   1005    // make it to the secondary V bits table.
   1006    vbits8 = n->vbits8[amod];
   1007    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
   1008    return vbits8;
   1009 }
   1010 
   1011 static void set_sec_vbits8(Addr a, UWord vbits8)
   1012 {
   1013    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
   1014    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
   1015    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
   1016    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
   1017    // make it to the secondary V bits table.
   1018    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
   1019    if (n) {
   1020       n->vbits8[amod] = vbits8;     // update
   1021       sec_vbits_updates++;
   1022    } else {
   1023       // Do a table GC if necessary.  Nb: do this before creating and
   1024       // inserting the new node, to avoid erroneously GC'ing the new node.
   1025       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
   1026          gcSecVBitTable();
   1027       }
   1028 
   1029       // New node:  assign the specific byte, make the rest invalid (they
   1030       // should never be read as-is, but be cautious).
   1031       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
   1032       n->a            = aAligned;
   1033       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
   1034          n->vbits8[i] = V_BITS8_UNDEFINED;
   1035       }
   1036       n->vbits8[amod] = vbits8;
   1037 
   1038       // Insert the new node.
   1039       VG_(OSetGen_Insert)(secVBitTable, n);
   1040       sec_vbits_new_nodes++;
   1041 
   1042       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
   1043       if (n_secVBit_nodes > max_secVBit_nodes)
   1044          max_secVBit_nodes = n_secVBit_nodes;
   1045    }
   1046 }
   1047 
   1048 /* --------------- Endianness helpers --------------- */
   1049 
   1050 /* Returns the offset in memory of the byteno-th most significant byte
   1051    in a wordszB-sized word, given the specified endianness. */
   1052 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
   1053                                     UWord byteno ) {
   1054    return bigendian ? (wordszB-1-byteno) : byteno;
   1055 }
   1056 
   1057 
   1058 /* --------------- Ignored address ranges --------------- */
   1059 
   1060 /* Denotes the address-error-reportability status for address ranges:
   1061    IAR_NotIgnored:  the usual case -- report errors in this range
   1062    IAR_CommandLine: don't report errors -- from command line setting
   1063    IAR_ClientReq:   don't report errors -- from client request
   1064 */
   1065 typedef
   1066    enum { IAR_INVALID=99,
   1067           IAR_NotIgnored,
   1068           IAR_CommandLine,
   1069           IAR_ClientReq }
   1070    IARKind;
   1071 
   1072 static const HChar* showIARKind ( IARKind iark )
   1073 {
   1074    switch (iark) {
   1075       case IAR_INVALID:     return "INVALID";
   1076       case IAR_NotIgnored:  return "NotIgnored";
   1077       case IAR_CommandLine: return "CommandLine";
   1078       case IAR_ClientReq:   return "ClientReq";
   1079       default:              return "???";
   1080    }
   1081 }
   1082 
   1083 // RangeMap<IARKind>
   1084 static RangeMap* gIgnoredAddressRanges = NULL;
   1085 
   1086 static void init_gIgnoredAddressRanges ( void )
   1087 {
   1088    if (LIKELY(gIgnoredAddressRanges != NULL))
   1089       return;
   1090    gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
   1091                                              VG_(free), IAR_NotIgnored );
   1092    tl_assert(gIgnoredAddressRanges != NULL);
   1093 }
   1094 
   1095 INLINE Bool MC_(in_ignored_range) ( Addr a )
   1096 {
   1097    if (LIKELY(gIgnoredAddressRanges == NULL))
   1098       return False;
   1099    UWord how     = IAR_INVALID;
   1100    UWord key_min = ~(UWord)0;
   1101    UWord key_max =  (UWord)0;
   1102    VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
   1103    tl_assert(key_min <= a && a <= key_max);
   1104    switch (how) {
   1105       case IAR_NotIgnored:  return False;
   1106       case IAR_CommandLine: return True;
   1107       case IAR_ClientReq:   return True;
   1108       default: break; /* invalid */
   1109    }
   1110    VG_(tool_panic)("MC_(in_ignore_range)");
   1111    /*NOTREACHED*/
   1112 }
   1113 
   1114 /* Parse two Addr separated by a dash, or fail. */
   1115 
   1116 static Bool parse_range ( const HChar** ppc, Addr* result1, Addr* result2 )
   1117 {
   1118    Bool ok = VG_(parse_Addr) (ppc, result1);
   1119    if (!ok)
   1120       return False;
   1121    if (**ppc != '-')
   1122       return False;
   1123    (*ppc)++;
   1124    ok = VG_(parse_Addr) (ppc, result2);
   1125    if (!ok)
   1126       return False;
   1127    return True;
   1128 }
   1129 
   1130 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
   1131    fail.  If they are valid, add them to the global set of ignored
   1132    ranges. */
   1133 static Bool parse_ignore_ranges ( const HChar* str0 )
   1134 {
   1135    init_gIgnoredAddressRanges();
   1136    const HChar*  str = str0;
   1137    const HChar** ppc = &str;
   1138    while (1) {
   1139       Addr start = ~(Addr)0;
   1140       Addr end   = (Addr)0;
   1141       Bool ok    = parse_range(ppc, &start, &end);
   1142       if (!ok)
   1143          return False;
   1144       if (start > end)
   1145          return False;
   1146       VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
   1147       if (**ppc == 0)
   1148          return True;
   1149       if (**ppc != ',')
   1150          return False;
   1151       (*ppc)++;
   1152    }
   1153    /*NOTREACHED*/
   1154    return False;
   1155 }
   1156 
   1157 /* Add or remove [start, +len) from the set of ignored ranges. */
   1158 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
   1159 {
   1160    init_gIgnoredAddressRanges();
   1161    const Bool verbose = (VG_(clo_verbosity) > 1);
   1162    if (len == 0) {
   1163       return False;
   1164    }
   1165    if (addRange) {
   1166       VG_(bindRangeMap)(gIgnoredAddressRanges,
   1167                         start, start+len-1, IAR_ClientReq);
   1168       if (verbose)
   1169          VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
   1170                    (void*)start, (void*)(start+len-1));
   1171    } else {
   1172       VG_(bindRangeMap)(gIgnoredAddressRanges,
   1173                         start, start+len-1, IAR_NotIgnored);
   1174       if (verbose)
   1175          VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
   1176                    (void*)start, (void*)(start+len-1));
   1177    }
   1178    if (verbose) {
   1179       VG_(dmsg)("memcheck:   now have %ld ranges:\n",
   1180                 VG_(sizeRangeMap)(gIgnoredAddressRanges));
   1181       Word i;
   1182       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
   1183          UWord val     = IAR_INVALID;
   1184          UWord key_min = ~(UWord)0;
   1185          UWord key_max = (UWord)0;
   1186          VG_(indexRangeMap)( &key_min, &key_max, &val,
   1187                              gIgnoredAddressRanges, i );
   1188          VG_(dmsg)("memcheck:      [%ld]  %016llx-%016llx  %s\n",
   1189                    i, (ULong)key_min, (ULong)key_max, showIARKind(val));
   1190       }
   1191    }
   1192    return True;
   1193 }
   1194 
   1195 
   1196 /* --------------- Load/store slow cases. --------------- */
   1197 
   1198 static
   1199 __attribute__((noinline))
   1200 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
   1201                                 Addr a, SizeT nBits, Bool bigendian )
   1202 {
   1203    ULong  pessim[4];     /* only used when p-l-ok=yes */
   1204    SSizeT szB            = nBits / 8;
   1205    SSizeT szL            = szB / 8;  /* Size in Longs (64-bit units) */
   1206    SSizeT i, j;          /* Must be signed. */
   1207    SizeT  n_addrs_bad = 0;
   1208    Addr   ai;
   1209    UChar  vbits8;
   1210    Bool   ok;
   1211 
   1212    /* Code below assumes load size is a power of two and at least 64
   1213       bits. */
   1214    tl_assert((szB & (szB-1)) == 0 && szL > 0);
   1215 
   1216    /* If this triggers, you probably just need to increase the size of
   1217       the pessim array. */
   1218    tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
   1219 
   1220    for (j = 0; j < szL; j++) {
   1221       pessim[j] = V_BITS64_DEFINED;
   1222       res[j] = V_BITS64_UNDEFINED;
   1223    }
   1224 
   1225    /* Make up a result V word, which contains the loaded data for
   1226       valid addresses and Defined for invalid addresses.  Iterate over
   1227       the bytes in the word, from the most significant down to the
   1228       least.  The vbits to return are calculated into vbits128.  Also
   1229       compute the pessimising value to be used when
   1230       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
   1231       info can be gleaned from the pessim array) but is used as a
   1232       cross-check. */
   1233    for (j = szL-1; j >= 0; j--) {
   1234       ULong vbits64    = V_BITS64_UNDEFINED;
   1235       ULong pessim64   = V_BITS64_DEFINED;
   1236       UWord long_index = byte_offset_w(szL, bigendian, j);
   1237       for (i = 8-1; i >= 0; i--) {
   1238          PROF_EVENT(29, "mc_LOADV_128_or_256_slow(loop)");
   1239          ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
   1240          ok = get_vbits8(ai, &vbits8);
   1241          vbits64 <<= 8;
   1242          vbits64 |= vbits8;
   1243          if (!ok) n_addrs_bad++;
   1244          pessim64 <<= 8;
   1245          pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
   1246       }
   1247       res[long_index] = vbits64;
   1248       pessim[long_index] = pessim64;
   1249    }
   1250 
   1251    /* In the common case, all the addresses involved are valid, so we
   1252       just return the computed V bits and have done. */
   1253    if (LIKELY(n_addrs_bad == 0))
   1254       return;
   1255 
   1256    /* If there's no possibility of getting a partial-loads-ok
   1257       exemption, report the error and quit. */
   1258    if (!MC_(clo_partial_loads_ok)) {
   1259       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1260       return;
   1261    }
   1262 
   1263    /* The partial-loads-ok excemption might apply.  Find out if it
   1264       does.  If so, don't report an addressing error, but do return
   1265       Undefined for the bytes that are out of range, so as to avoid
   1266       false negatives.  If it doesn't apply, just report an addressing
   1267       error in the usual way. */
   1268 
   1269    /* Some code steps along byte strings in aligned chunks
   1270       even when there is only a partially defined word at the end (eg,
   1271       optimised strlen).  This is allowed by the memory model of
   1272       modern machines, since an aligned load cannot span two pages and
   1273       thus cannot "partially fault".
   1274 
   1275       Therefore, a load from a partially-addressible place is allowed
   1276       if all of the following hold:
   1277       - the command-line flag is set [by default, it isn't]
   1278       - it's an aligned load
   1279       - at least one of the addresses in the word *is* valid
   1280 
   1281       Since this suppresses the addressing error, we avoid false
   1282       negatives by marking bytes undefined when they come from an
   1283       invalid address.
   1284    */
   1285 
   1286    /* "at least one of the addresses is invalid" */
   1287    ok = False;
   1288    for (j = 0; j < szL; j++)
   1289       ok |= pessim[j] != V_BITS8_DEFINED;
   1290    tl_assert(ok);
   1291 
   1292    if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
   1293       /* Exemption applies.  Use the previously computed pessimising
   1294          value and return the combined result, but don't flag an
   1295          addressing error.  The pessimising value is Defined for valid
   1296          addresses and Undefined for invalid addresses. */
   1297       /* for assumption that doing bitwise or implements UifU */
   1298       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1299       /* (really need "UifU" here...)
   1300          vbits[j] UifU= pessim[j]  (is pessimised by it, iow) */
   1301       for (j = szL-1; j >= 0; j--)
   1302          res[j] |= pessim[j];
   1303       return;
   1304    }
   1305 
   1306    /* Exemption doesn't apply.  Flag an addressing error in the normal
   1307       way. */
   1308    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1309 }
   1310 
   1311 
   1312 static
   1313 __attribute__((noinline))
   1314 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
   1315 {
   1316    PROF_EVENT(30, "mc_LOADVn_slow");
   1317 
   1318    /* ------------ BEGIN semi-fast cases ------------ */
   1319    /* These deal quickly-ish with the common auxiliary primary map
   1320       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1321       omitted without loss of correctness/functionality.  Note that in
   1322       both cases the "sizeof(void*) == 8" causes these cases to be
   1323       folded out by compilers on 32-bit platforms.  These are derived
   1324       from LOADV64 and LOADV32.
   1325    */
   1326    if (LIKELY(sizeof(void*) == 8
   1327                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1328       SecMap* sm       = get_secmap_for_reading(a);
   1329       UWord   sm_off16 = SM_OFF_16(a);
   1330       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1331       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
   1332          return V_BITS64_DEFINED;
   1333       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
   1334          return V_BITS64_UNDEFINED;
   1335       /* else fall into the slow case */
   1336    }
   1337    if (LIKELY(sizeof(void*) == 8
   1338                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1339       SecMap* sm = get_secmap_for_reading(a);
   1340       UWord sm_off = SM_OFF(a);
   1341       UWord vabits8 = sm->vabits8[sm_off];
   1342       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
   1343          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   1344       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
   1345          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   1346       /* else fall into slow case */
   1347    }
   1348    /* ------------ END semi-fast cases ------------ */
   1349 
   1350    ULong  vbits64     = V_BITS64_UNDEFINED; /* result */
   1351    ULong  pessim64    = V_BITS64_DEFINED;   /* only used when p-l-ok=yes */
   1352    SSizeT szB         = nBits / 8;
   1353    SSizeT i;          /* Must be signed. */
   1354    SizeT  n_addrs_bad = 0;
   1355    Addr   ai;
   1356    UChar  vbits8;
   1357    Bool   ok;
   1358 
   1359    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1360 
   1361    /* Make up a 64-bit result V word, which contains the loaded data
   1362       for valid addresses and Defined for invalid addresses.  Iterate
   1363       over the bytes in the word, from the most significant down to
   1364       the least.  The vbits to return are calculated into vbits64.
   1365       Also compute the pessimising value to be used when
   1366       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
   1367       info can be gleaned from pessim64) but is used as a
   1368       cross-check. */
   1369    for (i = szB-1; i >= 0; i--) {
   1370       PROF_EVENT(31, "mc_LOADVn_slow(loop)");
   1371       ai = a + byte_offset_w(szB, bigendian, i);
   1372       ok = get_vbits8(ai, &vbits8);
   1373       vbits64 <<= 8;
   1374       vbits64 |= vbits8;
   1375       if (!ok) n_addrs_bad++;
   1376       pessim64 <<= 8;
   1377       pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
   1378    }
   1379 
   1380    /* In the common case, all the addresses involved are valid, so we
   1381       just return the computed V bits and have done. */
   1382    if (LIKELY(n_addrs_bad == 0))
   1383       return vbits64;
   1384 
   1385    /* If there's no possibility of getting a partial-loads-ok
   1386       exemption, report the error and quit. */
   1387    if (!MC_(clo_partial_loads_ok)) {
   1388       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1389       return vbits64;
   1390    }
   1391 
   1392    /* The partial-loads-ok excemption might apply.  Find out if it
   1393       does.  If so, don't report an addressing error, but do return
   1394       Undefined for the bytes that are out of range, so as to avoid
   1395       false negatives.  If it doesn't apply, just report an addressing
   1396       error in the usual way. */
   1397 
   1398    /* Some code steps along byte strings in aligned word-sized chunks
   1399       even when there is only a partially defined word at the end (eg,
   1400       optimised strlen).  This is allowed by the memory model of
   1401       modern machines, since an aligned load cannot span two pages and
   1402       thus cannot "partially fault".  Despite such behaviour being
   1403       declared undefined by ANSI C/C++.
   1404 
   1405       Therefore, a load from a partially-addressible place is allowed
   1406       if all of the following hold:
   1407       - the command-line flag is set [by default, it isn't]
   1408       - it's a word-sized, word-aligned load
   1409       - at least one of the addresses in the word *is* valid
   1410 
   1411       Since this suppresses the addressing error, we avoid false
   1412       negatives by marking bytes undefined when they come from an
   1413       invalid address.
   1414    */
   1415 
   1416    /* "at least one of the addresses is invalid" */
   1417    tl_assert(pessim64 != V_BITS64_DEFINED);
   1418 
   1419    if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
   1420        && n_addrs_bad < VG_WORDSIZE) {
   1421       /* Exemption applies.  Use the previously computed pessimising
   1422          value for vbits64 and return the combined result, but don't
   1423          flag an addressing error.  The pessimising value is Defined
   1424          for valid addresses and Undefined for invalid addresses. */
   1425       /* for assumption that doing bitwise or implements UifU */
   1426       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1427       /* (really need "UifU" here...)
   1428          vbits64 UifU= pessim64  (is pessimised by it, iow) */
   1429       vbits64 |= pessim64;
   1430       return vbits64;
   1431    }
   1432 
   1433    /* Also, in appears that gcc generates string-stepping code in
   1434       32-bit chunks on 64 bit platforms.  So, also grant an exception
   1435       for this case.  Note that the first clause of the conditional
   1436       (VG_WORDSIZE == 8) is known at compile time, so the whole clause
   1437       will get folded out in 32 bit builds. */
   1438    if (VG_WORDSIZE == 8
   1439        && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
   1440       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1441       /* (really need "UifU" here...)
   1442          vbits64 UifU= pessim64  (is pessimised by it, iow) */
   1443       vbits64 |= pessim64;
   1444       /* Mark the upper 32 bits as undefined, just to be on the safe
   1445          side. */
   1446       vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
   1447       return vbits64;
   1448    }
   1449 
   1450    /* Exemption doesn't apply.  Flag an addressing error in the normal
   1451       way. */
   1452    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1453 
   1454    return vbits64;
   1455 }
   1456 
   1457 
   1458 static
   1459 __attribute__((noinline))
   1460 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
   1461 {
   1462    SizeT szB = nBits / 8;
   1463    SizeT i, n_addrs_bad = 0;
   1464    UChar vbits8;
   1465    Addr  ai;
   1466    Bool  ok;
   1467 
   1468    PROF_EVENT(35, "mc_STOREVn_slow");
   1469 
   1470    /* ------------ BEGIN semi-fast cases ------------ */
   1471    /* These deal quickly-ish with the common auxiliary primary map
   1472       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1473       omitted without loss of correctness/functionality.  Note that in
   1474       both cases the "sizeof(void*) == 8" causes these cases to be
   1475       folded out by compilers on 32-bit platforms.  The logic below
   1476       is somewhat similar to some cases extensively commented in
   1477       MC_(helperc_STOREV8).
   1478    */
   1479    if (LIKELY(sizeof(void*) == 8
   1480                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1481       SecMap* sm       = get_secmap_for_reading(a);
   1482       UWord   sm_off16 = SM_OFF_16(a);
   1483       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1484       if (LIKELY( !is_distinguished_sm(sm) &&
   1485                           (VA_BITS16_DEFINED   == vabits16 ||
   1486                            VA_BITS16_UNDEFINED == vabits16) )) {
   1487          /* Handle common case quickly: a is suitably aligned, */
   1488          /* is mapped, and is addressible. */
   1489          // Convert full V-bits in register to compact 2-bit form.
   1490          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
   1491             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
   1492             return;
   1493          } else if (V_BITS64_UNDEFINED == vbytes) {
   1494             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
   1495             return;
   1496          }
   1497          /* else fall into the slow case */
   1498       }
   1499       /* else fall into the slow case */
   1500    }
   1501    if (LIKELY(sizeof(void*) == 8
   1502                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1503       SecMap* sm      = get_secmap_for_reading(a);
   1504       UWord   sm_off  = SM_OFF(a);
   1505       UWord   vabits8 = sm->vabits8[sm_off];
   1506       if (LIKELY( !is_distinguished_sm(sm) &&
   1507                           (VA_BITS8_DEFINED   == vabits8 ||
   1508                            VA_BITS8_UNDEFINED == vabits8) )) {
   1509          /* Handle common case quickly: a is suitably aligned, */
   1510          /* is mapped, and is addressible. */
   1511          // Convert full V-bits in register to compact 2-bit form.
   1512          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
   1513             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
   1514             return;
   1515          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
   1516             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   1517             return;
   1518          }
   1519          /* else fall into the slow case */
   1520       }
   1521       /* else fall into the slow case */
   1522    }
   1523    /* ------------ END semi-fast cases ------------ */
   1524 
   1525    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1526 
   1527    /* Dump vbytes in memory, iterating from least to most significant
   1528       byte.  At the same time establish addressibility of the location. */
   1529    for (i = 0; i < szB; i++) {
   1530       PROF_EVENT(36, "mc_STOREVn_slow(loop)");
   1531       ai     = a + byte_offset_w(szB, bigendian, i);
   1532       vbits8 = vbytes & 0xff;
   1533       ok     = set_vbits8(ai, vbits8);
   1534       if (!ok) n_addrs_bad++;
   1535       vbytes >>= 8;
   1536    }
   1537 
   1538    /* If an address error has happened, report it. */
   1539    if (n_addrs_bad > 0)
   1540       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
   1541 }
   1542 
   1543 
   1544 /*------------------------------------------------------------*/
   1545 /*--- Setting permissions over address ranges.             ---*/
   1546 /*------------------------------------------------------------*/
   1547 
   1548 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
   1549                                       UWord dsm_num )
   1550 {
   1551    UWord    sm_off, sm_off16;
   1552    UWord    vabits2 = vabits16 & 0x3;
   1553    SizeT    lenA, lenB, len_to_next_secmap;
   1554    Addr     aNext;
   1555    SecMap*  sm;
   1556    SecMap** sm_ptr;
   1557    SecMap*  example_dsm;
   1558 
   1559    PROF_EVENT(150, "set_address_range_perms");
   1560 
   1561    /* Check the V+A bits make sense. */
   1562    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
   1563              VA_BITS16_UNDEFINED == vabits16 ||
   1564              VA_BITS16_DEFINED   == vabits16);
   1565 
   1566    // This code should never write PDBs;  ensure this.  (See comment above
   1567    // set_vabits2().)
   1568    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
   1569 
   1570    if (lenT == 0)
   1571       return;
   1572 
   1573    if (lenT > 256 * 1024 * 1024) {
   1574       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
   1575          const HChar* s = "unknown???";
   1576          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
   1577          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
   1578          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
   1579          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
   1580                                   "large range [0x%lx, 0x%lx) (%s)\n",
   1581                                   a, a + lenT, s);
   1582       }
   1583    }
   1584 
   1585 #ifndef PERF_FAST_SARP
   1586    /*------------------ debug-only case ------------------ */
   1587    {
   1588       // Endianness doesn't matter here because all bytes are being set to
   1589       // the same value.
   1590       // Nb: We don't have to worry about updating the sec-V-bits table
   1591       // after these set_vabits2() calls because this code never writes
   1592       // VA_BITS2_PARTDEFINED values.
   1593       SizeT i;
   1594       for (i = 0; i < lenT; i++) {
   1595          set_vabits2(a + i, vabits2);
   1596       }
   1597       return;
   1598    }
   1599 #endif
   1600 
   1601    /*------------------ standard handling ------------------ */
   1602 
   1603    /* Get the distinguished secondary that we might want
   1604       to use (part of the space-compression scheme). */
   1605    example_dsm = &sm_distinguished[dsm_num];
   1606 
   1607    // We have to handle ranges covering various combinations of partial and
   1608    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
   1609    // Cases marked with a '*' are common.
   1610    //
   1611    //   TYPE                                             PARTS USED
   1612    //   ----                                             ----------
   1613    // * one partial sec-map                  (p)         1
   1614    // - one whole sec-map                    (P)         2
   1615    //
   1616    // * two partial sec-maps                 (pp)        1,3
   1617    // - one partial, one whole sec-map       (pP)        1,2
   1618    // - one whole, one partial sec-map       (Pp)        2,3
   1619    // - two whole sec-maps                   (PP)        2,2
   1620    //
   1621    // * one partial, one whole, one partial  (pPp)       1,2,3
   1622    // - one partial, two whole               (pPP)       1,2,2
   1623    // - two whole, one partial               (PPp)       2,2,3
   1624    // - three whole                          (PPP)       2,2,2
   1625    //
   1626    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
   1627    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
   1628    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
   1629    // - N whole                              (PP...PP)   2,2...2,3
   1630 
   1631    // Break up total length (lenT) into two parts:  length in the first
   1632    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
   1633    aNext = start_of_this_sm(a) + SM_SIZE;
   1634    len_to_next_secmap = aNext - a;
   1635    if ( lenT <= len_to_next_secmap ) {
   1636       // Range entirely within one sec-map.  Covers almost all cases.
   1637       PROF_EVENT(151, "set_address_range_perms-single-secmap");
   1638       lenA = lenT;
   1639       lenB = 0;
   1640    } else if (is_start_of_sm(a)) {
   1641       // Range spans at least one whole sec-map, and starts at the beginning
   1642       // of a sec-map; skip to Part 2.
   1643       PROF_EVENT(152, "set_address_range_perms-startof-secmap");
   1644       lenA = 0;
   1645       lenB = lenT;
   1646       goto part2;
   1647    } else {
   1648       // Range spans two or more sec-maps, first one is partial.
   1649       PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
   1650       lenA = len_to_next_secmap;
   1651       lenB = lenT - lenA;
   1652    }
   1653 
   1654    //------------------------------------------------------------------------
   1655    // Part 1: Deal with the first sec_map.  Most of the time the range will be
   1656    // entirely within a sec_map and this part alone will suffice.  Also,
   1657    // doing it this way lets us avoid repeatedly testing for the crossing of
   1658    // a sec-map boundary within these loops.
   1659    //------------------------------------------------------------------------
   1660 
   1661    // If it's distinguished, make it undistinguished if necessary.
   1662    sm_ptr = get_secmap_ptr(a);
   1663    if (is_distinguished_sm(*sm_ptr)) {
   1664       if (*sm_ptr == example_dsm) {
   1665          // Sec-map already has the V+A bits that we want, so skip.
   1666          PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
   1667          a    = aNext;
   1668          lenA = 0;
   1669       } else {
   1670          PROF_EVENT(155, "set_address_range_perms-dist-sm1");
   1671          *sm_ptr = copy_for_writing(*sm_ptr);
   1672       }
   1673    }
   1674    sm = *sm_ptr;
   1675 
   1676    // 1 byte steps
   1677    while (True) {
   1678       if (VG_IS_8_ALIGNED(a)) break;
   1679       if (lenA < 1)           break;
   1680       PROF_EVENT(156, "set_address_range_perms-loop1a");
   1681       sm_off = SM_OFF(a);
   1682       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1683       a    += 1;
   1684       lenA -= 1;
   1685    }
   1686    // 8-aligned, 8 byte steps
   1687    while (True) {
   1688       if (lenA < 8) break;
   1689       PROF_EVENT(157, "set_address_range_perms-loop8a");
   1690       sm_off16 = SM_OFF_16(a);
   1691       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1692       a    += 8;
   1693       lenA -= 8;
   1694    }
   1695    // 1 byte steps
   1696    while (True) {
   1697       if (lenA < 1) break;
   1698       PROF_EVENT(158, "set_address_range_perms-loop1b");
   1699       sm_off = SM_OFF(a);
   1700       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1701       a    += 1;
   1702       lenA -= 1;
   1703    }
   1704 
   1705    // We've finished the first sec-map.  Is that it?
   1706    if (lenB == 0)
   1707       return;
   1708 
   1709    //------------------------------------------------------------------------
   1710    // Part 2: Fast-set entire sec-maps at a time.
   1711    //------------------------------------------------------------------------
   1712   part2:
   1713    // 64KB-aligned, 64KB steps.
   1714    // Nb: we can reach here with lenB < SM_SIZE
   1715    tl_assert(0 == lenA);
   1716    while (True) {
   1717       if (lenB < SM_SIZE) break;
   1718       tl_assert(is_start_of_sm(a));
   1719       PROF_EVENT(159, "set_address_range_perms-loop64K");
   1720       sm_ptr = get_secmap_ptr(a);
   1721       if (!is_distinguished_sm(*sm_ptr)) {
   1722          PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
   1723          // Free the non-distinguished sec-map that we're replacing.  This
   1724          // case happens moderately often, enough to be worthwhile.
   1725          SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
   1726          tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
   1727       }
   1728       update_SM_counts(*sm_ptr, example_dsm);
   1729       // Make the sec-map entry point to the example DSM
   1730       *sm_ptr = example_dsm;
   1731       lenB -= SM_SIZE;
   1732       a    += SM_SIZE;
   1733    }
   1734 
   1735    // We've finished the whole sec-maps.  Is that it?
   1736    if (lenB == 0)
   1737       return;
   1738 
   1739    //------------------------------------------------------------------------
   1740    // Part 3: Finish off the final partial sec-map, if necessary.
   1741    //------------------------------------------------------------------------
   1742 
   1743    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
   1744 
   1745    // If it's distinguished, make it undistinguished if necessary.
   1746    sm_ptr = get_secmap_ptr(a);
   1747    if (is_distinguished_sm(*sm_ptr)) {
   1748       if (*sm_ptr == example_dsm) {
   1749          // Sec-map already has the V+A bits that we want, so stop.
   1750          PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
   1751          return;
   1752       } else {
   1753          PROF_EVENT(162, "set_address_range_perms-dist-sm2");
   1754          *sm_ptr = copy_for_writing(*sm_ptr);
   1755       }
   1756    }
   1757    sm = *sm_ptr;
   1758 
   1759    // 8-aligned, 8 byte steps
   1760    while (True) {
   1761       if (lenB < 8) break;
   1762       PROF_EVENT(163, "set_address_range_perms-loop8b");
   1763       sm_off16 = SM_OFF_16(a);
   1764       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1765       a    += 8;
   1766       lenB -= 8;
   1767    }
   1768    // 1 byte steps
   1769    while (True) {
   1770       if (lenB < 1) return;
   1771       PROF_EVENT(164, "set_address_range_perms-loop1c");
   1772       sm_off = SM_OFF(a);
   1773       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1774       a    += 1;
   1775       lenB -= 1;
   1776    }
   1777 }
   1778 
   1779 
   1780 /* --- Set permissions for arbitrary address ranges --- */
   1781 
   1782 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
   1783 {
   1784    PROF_EVENT(40, "MC_(make_mem_noaccess)");
   1785    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
   1786    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
   1787    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1788       ocache_sarp_Clear_Origins ( a, len );
   1789 }
   1790 
   1791 static void make_mem_undefined ( Addr a, SizeT len )
   1792 {
   1793    PROF_EVENT(41, "make_mem_undefined");
   1794    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
   1795    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1796 }
   1797 
   1798 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
   1799 {
   1800    PROF_EVENT(43, "MC_(make_mem_undefined)");
   1801    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
   1802    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1803    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1804       ocache_sarp_Set_Origins ( a, len, otag );
   1805 }
   1806 
   1807 static
   1808 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
   1809                                           ThreadId tid, UInt okind )
   1810 {
   1811    UInt        ecu;
   1812    ExeContext* here;
   1813    /* VG_(record_ExeContext) checks for validity of tid, and asserts
   1814       if it is invalid.  So no need to do it here. */
   1815    tl_assert(okind <= 3);
   1816    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
   1817    tl_assert(here);
   1818    ecu = VG_(get_ECU_from_ExeContext)(here);
   1819    tl_assert(VG_(is_plausible_ECU)(ecu));
   1820    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
   1821 }
   1822 
   1823 static
   1824 void mc_new_mem_w_tid_make_ECU  ( Addr a, SizeT len, ThreadId tid )
   1825 {
   1826    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
   1827 }
   1828 
   1829 static
   1830 void mc_new_mem_w_tid_no_ECU  ( Addr a, SizeT len, ThreadId tid )
   1831 {
   1832    MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
   1833 }
   1834 
   1835 void MC_(make_mem_defined) ( Addr a, SizeT len )
   1836 {
   1837    PROF_EVENT(42, "MC_(make_mem_defined)");
   1838    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
   1839    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
   1840    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1841       ocache_sarp_Clear_Origins ( a, len );
   1842 }
   1843 
   1844 /* For each byte in [a,a+len), if the byte is addressable, make it be
   1845    defined, but if it isn't addressible, leave it alone.  In other
   1846    words a version of MC_(make_mem_defined) that doesn't mess with
   1847    addressibility.  Low-performance implementation. */
   1848 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
   1849 {
   1850    SizeT i;
   1851    UChar vabits2;
   1852    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
   1853    for (i = 0; i < len; i++) {
   1854       vabits2 = get_vabits2( a+i );
   1855       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
   1856          set_vabits2(a+i, VA_BITS2_DEFINED);
   1857          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1858             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1859          }
   1860       }
   1861    }
   1862 }
   1863 
   1864 /* Similarly (needed for mprotect handling ..) */
   1865 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
   1866 {
   1867    SizeT i;
   1868    UChar vabits2;
   1869    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
   1870    for (i = 0; i < len; i++) {
   1871       vabits2 = get_vabits2( a+i );
   1872       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
   1873          set_vabits2(a+i, VA_BITS2_DEFINED);
   1874          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1875             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1876          }
   1877       }
   1878    }
   1879 }
   1880 
   1881 /* --- Block-copy permissions (needed for implementing realloc() and
   1882        sys_mremap). --- */
   1883 
   1884 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
   1885 {
   1886    SizeT i, j;
   1887    UChar vabits2, vabits8;
   1888    Bool  aligned, nooverlap;
   1889 
   1890    DEBUG("MC_(copy_address_range_state)\n");
   1891    PROF_EVENT(50, "MC_(copy_address_range_state)");
   1892 
   1893    if (len == 0 || src == dst)
   1894       return;
   1895 
   1896    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
   1897    nooverlap = src+len <= dst || dst+len <= src;
   1898 
   1899    if (nooverlap && aligned) {
   1900 
   1901       /* Vectorised fast case, when no overlap and suitably aligned */
   1902       /* vector loop */
   1903       i = 0;
   1904       while (len >= 4) {
   1905          vabits8 = get_vabits8_for_aligned_word32( src+i );
   1906          set_vabits8_for_aligned_word32( dst+i, vabits8 );
   1907          if (LIKELY(VA_BITS8_DEFINED == vabits8
   1908                             || VA_BITS8_UNDEFINED == vabits8
   1909                             || VA_BITS8_NOACCESS == vabits8)) {
   1910             /* do nothing */
   1911          } else {
   1912             /* have to copy secondary map info */
   1913             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
   1914                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
   1915             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
   1916                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
   1917             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
   1918                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
   1919             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
   1920                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
   1921          }
   1922          i += 4;
   1923          len -= 4;
   1924       }
   1925       /* fixup loop */
   1926       while (len >= 1) {
   1927          vabits2 = get_vabits2( src+i );
   1928          set_vabits2( dst+i, vabits2 );
   1929          if (VA_BITS2_PARTDEFINED == vabits2) {
   1930             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1931          }
   1932          i++;
   1933          len--;
   1934       }
   1935 
   1936    } else {
   1937 
   1938       /* We have to do things the slow way */
   1939       if (src < dst) {
   1940          for (i = 0, j = len-1; i < len; i++, j--) {
   1941             PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
   1942             vabits2 = get_vabits2( src+j );
   1943             set_vabits2( dst+j, vabits2 );
   1944             if (VA_BITS2_PARTDEFINED == vabits2) {
   1945                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
   1946             }
   1947          }
   1948       }
   1949 
   1950       if (src > dst) {
   1951          for (i = 0; i < len; i++) {
   1952             PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
   1953             vabits2 = get_vabits2( src+i );
   1954             set_vabits2( dst+i, vabits2 );
   1955             if (VA_BITS2_PARTDEFINED == vabits2) {
   1956                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1957             }
   1958          }
   1959       }
   1960    }
   1961 
   1962 }
   1963 
   1964 
   1965 /*------------------------------------------------------------*/
   1966 /*--- Origin tracking stuff - cache basics                 ---*/
   1967 /*------------------------------------------------------------*/
   1968 
   1969 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
   1970    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1971 
   1972    Note that this implementation draws inspiration from the "origin
   1973    tracking by value piggybacking" scheme described in "Tracking Bad
   1974    Apples: Reporting the Origin of Null and Undefined Value Errors"
   1975    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
   1976    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
   1977    implemented completely differently.
   1978 
   1979    Origin tags and ECUs -- about the shadow values
   1980    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1981 
   1982    This implementation tracks the defining point of all uninitialised
   1983    values using so called "origin tags", which are 32-bit integers,
   1984    rather than using the values themselves to encode the origins.  The
   1985    latter, so-called value piggybacking", is what the OOPSLA07 paper
   1986    describes.
   1987 
   1988    Origin tags, as tracked by the machinery below, are 32-bit unsigned
   1989    ints (UInts), regardless of the machine's word size.  Each tag
   1990    comprises an upper 30-bit ECU field and a lower 2-bit
   1991    'kind' field.  The ECU field is a number given out by m_execontext
   1992    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
   1993    directly as an origin tag (otag), but in fact we want to put
   1994    additional information 'kind' field to indicate roughly where the
   1995    tag came from.  This helps print more understandable error messages
   1996    for the user -- it has no other purpose.  In summary:
   1997 
   1998    * Both ECUs and origin tags are represented as 32-bit words
   1999 
   2000    * m_execontext and the core-tool interface deal purely in ECUs.
   2001      They have no knowledge of origin tags - that is a purely
   2002      Memcheck-internal matter.
   2003 
   2004    * all valid ECUs have the lowest 2 bits zero and at least
   2005      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
   2006 
   2007    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
   2008      constants defined in mc_include.h.
   2009 
   2010    * to convert an otag back to an ECU, AND it with ~3
   2011 
   2012    One important fact is that no valid otag is zero.  A zero otag is
   2013    used by the implementation to indicate "no origin", which could
   2014    mean that either the value is defined, or it is undefined but the
   2015    implementation somehow managed to lose the origin.
   2016 
   2017    The ECU used for memory created by malloc etc is derived from the
   2018    stack trace at the time the malloc etc happens.  This means the
   2019    mechanism can show the exact allocation point for heap-created
   2020    uninitialised values.
   2021 
   2022    In contrast, it is simply too expensive to create a complete
   2023    backtrace for each stack allocation.  Therefore we merely use a
   2024    depth-1 backtrace for stack allocations, which can be done once at
   2025    translation time, rather than N times at run time.  The result of
   2026    this is that, for stack created uninitialised values, Memcheck can
   2027    only show the allocating function, and not what called it.
   2028    Furthermore, compilers tend to move the stack pointer just once at
   2029    the start of the function, to allocate all locals, and so in fact
   2030    the stack origin almost always simply points to the opening brace
   2031    of the function.  Net result is, for stack origins, the mechanism
   2032    can tell you in which function the undefined value was created, but
   2033    that's all.  Users will need to carefully check all locals in the
   2034    specified function.
   2035 
   2036    Shadowing registers and memory
   2037    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2038 
   2039    Memory is shadowed using a two level cache structure (ocacheL1 and
   2040    ocacheL2).  Memory references are first directed to ocacheL1.  This
   2041    is a traditional 2-way set associative cache with 32-byte lines and
   2042    approximate LRU replacement within each set.
   2043 
   2044    A naive implementation would require storing one 32 bit otag for
   2045    each byte of memory covered, a 4:1 space overhead.  Instead, there
   2046    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
   2047    that shows which of the 4 bytes have that shadow value and which
   2048    have a shadow value of zero (indicating no origin).  Hence a lot of
   2049    space is saved, but the cost is that only one different origin per
   2050    4 bytes of address space can be represented.  This is a source of
   2051    imprecision, but how much of a problem it really is remains to be
   2052    seen.
   2053 
   2054    A cache line that contains all zeroes ("no origins") contains no
   2055    useful information, and can be ejected from the L1 cache "for
   2056    free", in the sense that a read miss on the L1 causes a line of
   2057    zeroes to be installed.  However, ejecting a line containing
   2058    nonzeroes risks losing origin information permanently.  In order to
   2059    prevent such lossage, ejected nonzero lines are placed in a
   2060    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
   2061    lines.  This can grow arbitrarily large, and so should ensure that
   2062    Memcheck runs out of memory in preference to losing useful origin
   2063    info due to cache size limitations.
   2064 
   2065    Shadowing registers is a bit tricky, because the shadow values are
   2066    32 bits, regardless of the size of the register.  That gives a
   2067    problem for registers smaller than 32 bits.  The solution is to
   2068    find spaces in the guest state that are unused, and use those to
   2069    shadow guest state fragments smaller than 32 bits.  For example, on
   2070    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
   2071    shadow are allocated for the register's otag, then there are still
   2072    12 bytes left over which could be used to shadow 3 other values.
   2073 
   2074    This implies there is some non-obvious mapping from guest state
   2075    (start,length) pairs to the relevant shadow offset (for the origin
   2076    tags).  And it is unfortunately guest-architecture specific.  The
   2077    mapping is contained in mc_machine.c, which is quite lengthy but
   2078    straightforward.
   2079 
   2080    Instrumenting the IR
   2081    ~~~~~~~~~~~~~~~~~~~~
   2082 
   2083    Instrumentation is largely straightforward, and done by the
   2084    functions schemeE and schemeS in mc_translate.c.  These generate
   2085    code for handling the origin tags of expressions (E) and statements
   2086    (S) respectively.  The rather strange names are a reference to the
   2087    "compilation schemes" shown in Simon Peyton Jones' book "The
   2088    Implementation of Functional Programming Languages" (Prentice Hall,
   2089    1987, see
   2090    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
   2091 
   2092    schemeS merely arranges to move shadow values around the guest
   2093    state to track the incoming IR.  schemeE is largely trivial too.
   2094    The only significant point is how to compute the otag corresponding
   2095    to binary (or ternary, quaternary, etc) operator applications.  The
   2096    rule is simple: just take whichever value is larger (32-bit
   2097    unsigned max).  Constants get the special value zero.  Hence this
   2098    rule always propagates a nonzero (known) otag in preference to a
   2099    zero (unknown, or more likely, value-is-defined) tag, as we want.
   2100    If two different undefined values are inputs to a binary operator
   2101    application, then which is propagated is arbitrary, but that
   2102    doesn't matter, since the program is erroneous in using either of
   2103    the values, and so there's no point in attempting to propagate
   2104    both.
   2105 
   2106    Since constants are abstracted to (otag) zero, much of the
   2107    instrumentation code can be folded out without difficulty by the
   2108    generic post-instrumentation IR cleanup pass, using these rules:
   2109    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
   2110    constants is evaluated at JIT time.  And the resulting dead code
   2111    removal.  In practice this causes surprisingly few Max32Us to
   2112    survive through to backend code generation.
   2113 
   2114    Integration with the V-bits machinery
   2115    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2116 
   2117    This is again largely straightforward.  Mostly the otag and V bits
   2118    stuff are independent.  The only point of interaction is when the V
   2119    bits instrumenter creates a call to a helper function to report an
   2120    uninitialised value error -- in that case it must first use schemeE
   2121    to get hold of the origin tag expression for the value, and pass
   2122    that to the helper too.
   2123 
   2124    There is the usual stuff to do with setting address range
   2125    permissions.  When memory is painted undefined, we must also know
   2126    the origin tag to paint with, which involves some tedious plumbing,
   2127    particularly to do with the fast case stack handlers.  When memory
   2128    is painted defined or noaccess then the origin tags must be forced
   2129    to zero.
   2130 
   2131    One of the goals of the implementation was to ensure that the
   2132    non-origin tracking mode isn't slowed down at all.  To do this,
   2133    various functions to do with memory permissions setting (again,
   2134    mostly pertaining to the stack) are duplicated for the with- and
   2135    without-otag case.
   2136 
   2137    Dealing with stack redzones, and the NIA cache
   2138    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2139 
   2140    This is one of the few non-obvious parts of the implementation.
   2141 
   2142    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
   2143    reserved area below the stack pointer, that can be used as scratch
   2144    space by compiler generated code for functions.  In the Memcheck
   2145    sources this is referred to as the "stack redzone".  The important
   2146    thing here is that such redzones are considered volatile across
   2147    function calls and returns.  So Memcheck takes care to mark them as
   2148    undefined for each call and return, on the afflicted platforms.
   2149    Past experience shows this is essential in order to get reliable
   2150    messages about uninitialised values that come from the stack.
   2151 
   2152    So the question is, when we paint a redzone undefined, what origin
   2153    tag should we use for it?  Consider a function f() calling g().  If
   2154    we paint the redzone using an otag derived from the ExeContext of
   2155    the CALL/BL instruction in f, then any errors in g causing it to
   2156    use uninitialised values that happen to lie in the redzone, will be
   2157    reported as having their origin in f.  Which is highly confusing.
   2158 
   2159    The same applies for returns: if, on a return, we paint the redzone
   2160    using a origin tag derived from the ExeContext of the RET/BLR
   2161    instruction in g, then any later errors in f causing it to use
   2162    uninitialised values in the redzone, will be reported as having
   2163    their origin in g.  Which is just as confusing.
   2164 
   2165    To do it right, in both cases we need to use an origin tag which
   2166    pertains to the instruction which dynamically follows the CALL/BL
   2167    or RET/BLR.  In short, one derived from the NIA - the "next
   2168    instruction address".
   2169 
   2170    To make this work, Memcheck's redzone-painting helper,
   2171    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
   2172    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
   2173    ExeContext's ECU as the basis for the otag used to paint the
   2174    redzone.  The expensive part of this is converting an NIA into an
   2175    ECU, since this happens once for every call and every return.  So
   2176    we use a simple 511-line, 2-way set associative cache
   2177    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
   2178    the cost out.
   2179 
   2180    Further background comments
   2181    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2182 
   2183    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
   2184    > it really just the address of the relevant ExeContext?
   2185 
   2186    Well, it's not the address, but a value which has a 1-1 mapping
   2187    with ExeContexts, and is guaranteed not to be zero, since zero
   2188    denotes (to memcheck) "unknown origin or defined value".  So these
   2189    UInts are just numbers starting at 4 and incrementing by 4; each
   2190    ExeContext is given a number when it is created.  (*** NOTE this
   2191    confuses otags and ECUs; see comments above ***).
   2192 
   2193    Making these otags 32-bit regardless of the machine's word size
   2194    makes the 64-bit implementation easier (next para).  And it doesn't
   2195    really limit us in any way, since for the tags to overflow would
   2196    require that the program somehow caused 2^30-1 different
   2197    ExeContexts to be created, in which case it is probably in deep
   2198    trouble.  Not to mention V will have soaked up many tens of
   2199    gigabytes of memory merely to store them all.
   2200 
   2201    So having 64-bit origins doesn't really buy you anything, and has
   2202    the following downsides:
   2203 
   2204    Suppose that instead, an otag is a UWord.  This would mean that, on
   2205    a 64-bit target,
   2206 
   2207    1. It becomes hard to shadow any element of guest state which is
   2208       smaller than 8 bytes.  To do so means you'd need to find some
   2209       8-byte-sized hole in the guest state which you don't want to
   2210       shadow, and use that instead to hold the otag.  On ppc64, the
   2211       condition code register(s) are split into 20 UChar sized pieces,
   2212       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
   2213       and so that would entail finding 160 bytes somewhere else in the
   2214       guest state.
   2215 
   2216       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
   2217       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
   2218       same) and so I had to look for 4 untracked otag-sized areas in
   2219       the guest state to make that possible.
   2220 
   2221       The same problem exists of course when origin tags are only 32
   2222       bits, but it's less extreme.
   2223 
   2224    2. (More compelling) it doubles the size of the origin shadow
   2225       memory.  Given that the shadow memory is organised as a fixed
   2226       size cache, and that accuracy of tracking is limited by origins
   2227       falling out the cache due to space conflicts, this isn't good.
   2228 
   2229    > Another question: is the origin tracking perfect, or are there
   2230    > cases where it fails to determine an origin?
   2231 
   2232    It is imperfect for at least for the following reasons, and
   2233    probably more:
   2234 
   2235    * Insufficient capacity in the origin cache.  When a line is
   2236      evicted from the cache it is gone forever, and so subsequent
   2237      queries for the line produce zero, indicating no origin
   2238      information.  Interestingly, a line containing all zeroes can be
   2239      evicted "free" from the cache, since it contains no useful
   2240      information, so there is scope perhaps for some cleverer cache
   2241      management schemes.  (*** NOTE, with the introduction of the
   2242      second level origin tag cache, ocacheL2, this is no longer a
   2243      problem. ***)
   2244 
   2245    * The origin cache only stores one otag per 32-bits of address
   2246      space, plus 4 bits indicating which of the 4 bytes has that tag
   2247      and which are considered defined.  The result is that if two
   2248      undefined bytes in the same word are stored in memory, the first
   2249      stored byte's origin will be lost and replaced by the origin for
   2250      the second byte.
   2251 
   2252    * Nonzero origin tags for defined values.  Consider a binary
   2253      operator application op(x,y).  Suppose y is undefined (and so has
   2254      a valid nonzero origin tag), and x is defined, but erroneously
   2255      has a nonzero origin tag (defined values should have tag zero).
   2256      If the erroneous tag has a numeric value greater than y's tag,
   2257      then the rule for propagating origin tags though binary
   2258      operations, which is simply to take the unsigned max of the two
   2259      tags, will erroneously propagate x's tag rather than y's.
   2260 
   2261    * Some obscure uses of x86/amd64 byte registers can cause lossage
   2262      or confusion of origins.  %AH .. %DH are treated as different
   2263      from, and unrelated to, their parent registers, %EAX .. %EDX.
   2264      So some wierd sequences like
   2265 
   2266         movb undefined-value, %AH
   2267         movb defined-value, %AL
   2268         .. use %AX or %EAX ..
   2269 
   2270      will cause the origin attributed to %AH to be ignored, since %AL,
   2271      %AX, %EAX are treated as the same register, and %AH as a
   2272      completely separate one.
   2273 
   2274    But having said all that, it actually seems to work fairly well in
   2275    practice.
   2276 */
   2277 
   2278 static UWord stats_ocacheL1_find           = 0;
   2279 static UWord stats_ocacheL1_found_at_1     = 0;
   2280 static UWord stats_ocacheL1_found_at_N     = 0;
   2281 static UWord stats_ocacheL1_misses         = 0;
   2282 static UWord stats_ocacheL1_lossage        = 0;
   2283 static UWord stats_ocacheL1_movefwds       = 0;
   2284 
   2285 static UWord stats__ocacheL2_refs          = 0;
   2286 static UWord stats__ocacheL2_misses        = 0;
   2287 static UWord stats__ocacheL2_n_nodes_max   = 0;
   2288 
   2289 /* Cache of 32-bit values, one every 32 bits of address space */
   2290 
   2291 #define OC_BITS_PER_LINE 5
   2292 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
   2293 
   2294 static INLINE UWord oc_line_offset ( Addr a ) {
   2295    return (a >> 2) & (OC_W32S_PER_LINE - 1);
   2296 }
   2297 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
   2298    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
   2299 }
   2300 
   2301 #define OC_LINES_PER_SET 2
   2302 
   2303 #define OC_N_SET_BITS    20
   2304 #define OC_N_SETS        (1 << OC_N_SET_BITS)
   2305 
   2306 /* These settings give:
   2307    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
   2308    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
   2309 */
   2310 
   2311 #define OC_MOVE_FORWARDS_EVERY_BITS 7
   2312 
   2313 
   2314 typedef
   2315    struct {
   2316       Addr  tag;
   2317       UInt  w32[OC_W32S_PER_LINE];
   2318       UChar descr[OC_W32S_PER_LINE];
   2319    }
   2320    OCacheLine;
   2321 
   2322 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
   2323    in use, 'n' (nonzero) if it contains at least one valid origin tag,
   2324    and 'z' if all the represented tags are zero. */
   2325 static UChar classify_OCacheLine ( OCacheLine* line )
   2326 {
   2327    UWord i;
   2328    if (line->tag == 1/*invalid*/)
   2329       return 'e'; /* EMPTY */
   2330    tl_assert(is_valid_oc_tag(line->tag));
   2331    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2332       tl_assert(0 == ((~0xF) & line->descr[i]));
   2333       if (line->w32[i] > 0 && line->descr[i] > 0)
   2334          return 'n'; /* NONZERO - contains useful info */
   2335    }
   2336    return 'z'; /* ZERO - no useful info */
   2337 }
   2338 
   2339 typedef
   2340    struct {
   2341       OCacheLine line[OC_LINES_PER_SET];
   2342    }
   2343    OCacheSet;
   2344 
   2345 typedef
   2346    struct {
   2347       OCacheSet set[OC_N_SETS];
   2348    }
   2349    OCache;
   2350 
   2351 static OCache* ocacheL1 = NULL;
   2352 static UWord   ocacheL1_event_ctr = 0;
   2353 
   2354 static void init_ocacheL2 ( void ); /* fwds */
   2355 static void init_OCache ( void )
   2356 {
   2357    UWord line, set;
   2358    tl_assert(MC_(clo_mc_level) >= 3);
   2359    tl_assert(ocacheL1 == NULL);
   2360    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
   2361    if (ocacheL1 == NULL) {
   2362       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
   2363                                    sizeof(OCache) );
   2364    }
   2365    tl_assert(ocacheL1 != NULL);
   2366    for (set = 0; set < OC_N_SETS; set++) {
   2367       for (line = 0; line < OC_LINES_PER_SET; line++) {
   2368          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
   2369       }
   2370    }
   2371    init_ocacheL2();
   2372 }
   2373 
   2374 static void moveLineForwards ( OCacheSet* set, UWord lineno )
   2375 {
   2376    OCacheLine tmp;
   2377    stats_ocacheL1_movefwds++;
   2378    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
   2379    tmp = set->line[lineno-1];
   2380    set->line[lineno-1] = set->line[lineno];
   2381    set->line[lineno] = tmp;
   2382 }
   2383 
   2384 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
   2385    UWord i;
   2386    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2387       line->w32[i] = 0; /* NO ORIGIN */
   2388       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
   2389    }
   2390    line->tag = tag;
   2391 }
   2392 
   2393 //////////////////////////////////////////////////////////////
   2394 //// OCache backing store
   2395 
   2396 static OSet* ocacheL2 = NULL;
   2397 
   2398 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
   2399    return VG_(malloc)(cc, szB);
   2400 }
   2401 static void ocacheL2_free ( void* v ) {
   2402    VG_(free)( v );
   2403 }
   2404 
   2405 /* Stats: # nodes currently in tree */
   2406 static UWord stats__ocacheL2_n_nodes = 0;
   2407 
   2408 static void init_ocacheL2 ( void )
   2409 {
   2410    tl_assert(!ocacheL2);
   2411    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
   2412    tl_assert(0 == offsetof(OCacheLine,tag));
   2413    ocacheL2
   2414       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
   2415                              NULL, /* fast cmp */
   2416                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
   2417    tl_assert(ocacheL2);
   2418    stats__ocacheL2_n_nodes = 0;
   2419 }
   2420 
   2421 /* Find line with the given tag in the tree, or NULL if not found. */
   2422 static OCacheLine* ocacheL2_find_tag ( Addr tag )
   2423 {
   2424    OCacheLine* line;
   2425    tl_assert(is_valid_oc_tag(tag));
   2426    stats__ocacheL2_refs++;
   2427    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
   2428    return line;
   2429 }
   2430 
   2431 /* Delete the line with the given tag from the tree, if it is present, and
   2432    free up the associated memory. */
   2433 static void ocacheL2_del_tag ( Addr tag )
   2434 {
   2435    OCacheLine* line;
   2436    tl_assert(is_valid_oc_tag(tag));
   2437    stats__ocacheL2_refs++;
   2438    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
   2439    if (line) {
   2440       VG_(OSetGen_FreeNode)(ocacheL2, line);
   2441       tl_assert(stats__ocacheL2_n_nodes > 0);
   2442       stats__ocacheL2_n_nodes--;
   2443    }
   2444 }
   2445 
   2446 /* Add a copy of the given line to the tree.  It must not already be
   2447    present. */
   2448 static void ocacheL2_add_line ( OCacheLine* line )
   2449 {
   2450    OCacheLine* copy;
   2451    tl_assert(is_valid_oc_tag(line->tag));
   2452    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
   2453    tl_assert(copy);
   2454    *copy = *line;
   2455    stats__ocacheL2_refs++;
   2456    VG_(OSetGen_Insert)( ocacheL2, copy );
   2457    stats__ocacheL2_n_nodes++;
   2458    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
   2459       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
   2460 }
   2461 
   2462 ////
   2463 //////////////////////////////////////////////////////////////
   2464 
   2465 __attribute__((noinline))
   2466 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
   2467 {
   2468    OCacheLine *victim, *inL2;
   2469    UChar c;
   2470    UWord line;
   2471    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2472    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2473    UWord tag     = a & tagmask;
   2474    tl_assert(setno >= 0 && setno < OC_N_SETS);
   2475 
   2476    /* we already tried line == 0; skip therefore. */
   2477    for (line = 1; line < OC_LINES_PER_SET; line++) {
   2478       if (ocacheL1->set[setno].line[line].tag == tag) {
   2479          if (line == 1) {
   2480             stats_ocacheL1_found_at_1++;
   2481          } else {
   2482             stats_ocacheL1_found_at_N++;
   2483          }
   2484          if (UNLIKELY(0 == (ocacheL1_event_ctr++
   2485                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
   2486             moveLineForwards( &ocacheL1->set[setno], line );
   2487             line--;
   2488          }
   2489          return &ocacheL1->set[setno].line[line];
   2490       }
   2491    }
   2492 
   2493    /* A miss.  Use the last slot.  Implicitly this means we're
   2494       ejecting the line in the last slot. */
   2495    stats_ocacheL1_misses++;
   2496    tl_assert(line == OC_LINES_PER_SET);
   2497    line--;
   2498    tl_assert(line > 0);
   2499 
   2500    /* First, move the to-be-ejected line to the L2 cache. */
   2501    victim = &ocacheL1->set[setno].line[line];
   2502    c = classify_OCacheLine(victim);
   2503    switch (c) {
   2504       case 'e':
   2505          /* the line is empty (has invalid tag); ignore it. */
   2506          break;
   2507       case 'z':
   2508          /* line contains zeroes.  We must ensure the backing store is
   2509             updated accordingly, either by copying the line there
   2510             verbatim, or by ensuring it isn't present there.  We
   2511             chosse the latter on the basis that it reduces the size of
   2512             the backing store. */
   2513          ocacheL2_del_tag( victim->tag );
   2514          break;
   2515       case 'n':
   2516          /* line contains at least one real, useful origin.  Copy it
   2517             to the backing store. */
   2518          stats_ocacheL1_lossage++;
   2519          inL2 = ocacheL2_find_tag( victim->tag );
   2520          if (inL2) {
   2521             *inL2 = *victim;
   2522          } else {
   2523             ocacheL2_add_line( victim );
   2524          }
   2525          break;
   2526       default:
   2527          tl_assert(0);
   2528    }
   2529 
   2530    /* Now we must reload the L1 cache from the backing tree, if
   2531       possible. */
   2532    tl_assert(tag != victim->tag); /* stay sane */
   2533    inL2 = ocacheL2_find_tag( tag );
   2534    if (inL2) {
   2535       /* We're in luck.  It's in the L2. */
   2536       ocacheL1->set[setno].line[line] = *inL2;
   2537    } else {
   2538       /* Missed at both levels of the cache hierarchy.  We have to
   2539          declare it as full of zeroes (unknown origins). */
   2540       stats__ocacheL2_misses++;
   2541       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
   2542    }
   2543 
   2544    /* Move it one forwards */
   2545    moveLineForwards( &ocacheL1->set[setno], line );
   2546    line--;
   2547 
   2548    return &ocacheL1->set[setno].line[line];
   2549 }
   2550 
   2551 static INLINE OCacheLine* find_OCacheLine ( Addr a )
   2552 {
   2553    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2554    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2555    UWord tag     = a & tagmask;
   2556 
   2557    stats_ocacheL1_find++;
   2558 
   2559    if (OC_ENABLE_ASSERTIONS) {
   2560       tl_assert(setno >= 0 && setno < OC_N_SETS);
   2561       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
   2562    }
   2563 
   2564    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
   2565       return &ocacheL1->set[setno].line[0];
   2566    }
   2567 
   2568    return find_OCacheLine_SLOW( a );
   2569 }
   2570 
   2571 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
   2572 {
   2573    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2574    //// Set the origins for a+0 .. a+7
   2575    { OCacheLine* line;
   2576      UWord lineoff = oc_line_offset(a);
   2577      if (OC_ENABLE_ASSERTIONS) {
   2578         tl_assert(lineoff >= 0
   2579                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2580      }
   2581      line = find_OCacheLine( a );
   2582      line->descr[lineoff+0] = 0xF;
   2583      line->descr[lineoff+1] = 0xF;
   2584      line->w32[lineoff+0]   = otag;
   2585      line->w32[lineoff+1]   = otag;
   2586    }
   2587    //// END inlined, specialised version of MC_(helperc_b_store8)
   2588 }
   2589 
   2590 
   2591 /*------------------------------------------------------------*/
   2592 /*--- Aligned fast case permission setters,                ---*/
   2593 /*--- for dealing with stacks                              ---*/
   2594 /*------------------------------------------------------------*/
   2595 
   2596 /*--------------------- 32-bit ---------------------*/
   2597 
   2598 /* Nb: by "aligned" here we mean 4-byte aligned */
   2599 
   2600 static INLINE void make_aligned_word32_undefined ( Addr a )
   2601 {
   2602    PROF_EVENT(300, "make_aligned_word32_undefined");
   2603 
   2604 #ifndef PERF_FAST_STACK2
   2605    make_mem_undefined(a, 4);
   2606 #else
   2607    {
   2608       UWord   sm_off;
   2609       SecMap* sm;
   2610 
   2611       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2612          PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
   2613          make_mem_undefined(a, 4);
   2614          return;
   2615       }
   2616 
   2617       sm                  = get_secmap_for_writing_low(a);
   2618       sm_off              = SM_OFF(a);
   2619       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   2620    }
   2621 #endif
   2622 }
   2623 
   2624 static INLINE
   2625 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
   2626 {
   2627    make_aligned_word32_undefined(a);
   2628    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2629    //// Set the origins for a+0 .. a+3
   2630    { OCacheLine* line;
   2631      UWord lineoff = oc_line_offset(a);
   2632      if (OC_ENABLE_ASSERTIONS) {
   2633         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2634      }
   2635      line = find_OCacheLine( a );
   2636      line->descr[lineoff] = 0xF;
   2637      line->w32[lineoff]   = otag;
   2638    }
   2639    //// END inlined, specialised version of MC_(helperc_b_store4)
   2640 }
   2641 
   2642 static INLINE
   2643 void make_aligned_word32_noaccess ( Addr a )
   2644 {
   2645    PROF_EVENT(310, "make_aligned_word32_noaccess");
   2646 
   2647 #ifndef PERF_FAST_STACK2
   2648    MC_(make_mem_noaccess)(a, 4);
   2649 #else
   2650    {
   2651       UWord   sm_off;
   2652       SecMap* sm;
   2653 
   2654       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2655          PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
   2656          MC_(make_mem_noaccess)(a, 4);
   2657          return;
   2658       }
   2659 
   2660       sm                  = get_secmap_for_writing_low(a);
   2661       sm_off              = SM_OFF(a);
   2662       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
   2663 
   2664       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2665       //// Set the origins for a+0 .. a+3.
   2666       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2667          OCacheLine* line;
   2668          UWord lineoff = oc_line_offset(a);
   2669          if (OC_ENABLE_ASSERTIONS) {
   2670             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2671          }
   2672          line = find_OCacheLine( a );
   2673          line->descr[lineoff] = 0;
   2674       }
   2675       //// END inlined, specialised version of MC_(helperc_b_store4)
   2676    }
   2677 #endif
   2678 }
   2679 
   2680 /*--------------------- 64-bit ---------------------*/
   2681 
   2682 /* Nb: by "aligned" here we mean 8-byte aligned */
   2683 
   2684 static INLINE void make_aligned_word64_undefined ( Addr a )
   2685 {
   2686    PROF_EVENT(320, "make_aligned_word64_undefined");
   2687 
   2688 #ifndef PERF_FAST_STACK2
   2689    make_mem_undefined(a, 8);
   2690 #else
   2691    {
   2692       UWord   sm_off16;
   2693       SecMap* sm;
   2694 
   2695       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2696          PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
   2697          make_mem_undefined(a, 8);
   2698          return;
   2699       }
   2700 
   2701       sm       = get_secmap_for_writing_low(a);
   2702       sm_off16 = SM_OFF_16(a);
   2703       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
   2704    }
   2705 #endif
   2706 }
   2707 
   2708 static INLINE
   2709 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
   2710 {
   2711    make_aligned_word64_undefined(a);
   2712    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2713    //// Set the origins for a+0 .. a+7
   2714    { OCacheLine* line;
   2715      UWord lineoff = oc_line_offset(a);
   2716      tl_assert(lineoff >= 0
   2717                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2718      line = find_OCacheLine( a );
   2719      line->descr[lineoff+0] = 0xF;
   2720      line->descr[lineoff+1] = 0xF;
   2721      line->w32[lineoff+0]   = otag;
   2722      line->w32[lineoff+1]   = otag;
   2723    }
   2724    //// END inlined, specialised version of MC_(helperc_b_store8)
   2725 }
   2726 
   2727 static INLINE
   2728 void make_aligned_word64_noaccess ( Addr a )
   2729 {
   2730    PROF_EVENT(330, "make_aligned_word64_noaccess");
   2731 
   2732 #ifndef PERF_FAST_STACK2
   2733    MC_(make_mem_noaccess)(a, 8);
   2734 #else
   2735    {
   2736       UWord   sm_off16;
   2737       SecMap* sm;
   2738 
   2739       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2740          PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
   2741          MC_(make_mem_noaccess)(a, 8);
   2742          return;
   2743       }
   2744 
   2745       sm       = get_secmap_for_writing_low(a);
   2746       sm_off16 = SM_OFF_16(a);
   2747       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
   2748 
   2749       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2750       //// Clear the origins for a+0 .. a+7.
   2751       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2752          OCacheLine* line;
   2753          UWord lineoff = oc_line_offset(a);
   2754          tl_assert(lineoff >= 0
   2755                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2756          line = find_OCacheLine( a );
   2757          line->descr[lineoff+0] = 0;
   2758          line->descr[lineoff+1] = 0;
   2759       }
   2760       //// END inlined, specialised version of MC_(helperc_b_store8)
   2761    }
   2762 #endif
   2763 }
   2764 
   2765 
   2766 /*------------------------------------------------------------*/
   2767 /*--- Stack pointer adjustment                             ---*/
   2768 /*------------------------------------------------------------*/
   2769 
   2770 #ifdef PERF_FAST_STACK
   2771 #  define MAYBE_USED
   2772 #else
   2773 #  define MAYBE_USED __attribute__((unused))
   2774 #endif
   2775 
   2776 /*--------------- adjustment by 4 bytes ---------------*/
   2777 
   2778 MAYBE_USED
   2779 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
   2780 {
   2781    UInt otag = ecu | MC_OKIND_STACK;
   2782    PROF_EVENT(110, "new_mem_stack_4");
   2783    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2784       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2785    } else {
   2786       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
   2787    }
   2788 }
   2789 
   2790 MAYBE_USED
   2791 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
   2792 {
   2793    PROF_EVENT(110, "new_mem_stack_4");
   2794    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2795       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2796    } else {
   2797       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
   2798    }
   2799 }
   2800 
   2801 MAYBE_USED
   2802 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
   2803 {
   2804    PROF_EVENT(120, "die_mem_stack_4");
   2805    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2806       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2807    } else {
   2808       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
   2809    }
   2810 }
   2811 
   2812 /*--------------- adjustment by 8 bytes ---------------*/
   2813 
   2814 MAYBE_USED
   2815 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
   2816 {
   2817    UInt otag = ecu | MC_OKIND_STACK;
   2818    PROF_EVENT(111, "new_mem_stack_8");
   2819    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2820       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2821    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2822       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2823       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2824    } else {
   2825       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
   2826    }
   2827 }
   2828 
   2829 MAYBE_USED
   2830 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
   2831 {
   2832    PROF_EVENT(111, "new_mem_stack_8");
   2833    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2834       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2835    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2836       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2837       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2838    } else {
   2839       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
   2840    }
   2841 }
   2842 
   2843 MAYBE_USED
   2844 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
   2845 {
   2846    PROF_EVENT(121, "die_mem_stack_8");
   2847    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2848       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2849    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2850       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2851       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2852    } else {
   2853       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
   2854    }
   2855 }
   2856 
   2857 /*--------------- adjustment by 12 bytes ---------------*/
   2858 
   2859 MAYBE_USED
   2860 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
   2861 {
   2862    UInt otag = ecu | MC_OKIND_STACK;
   2863    PROF_EVENT(112, "new_mem_stack_12");
   2864    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2865       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2866       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2867    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2868       /* from previous test we don't have 8-alignment at offset +0,
   2869          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2870          do 4 at +0 and then 8 at +4/. */
   2871       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2872       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2873    } else {
   2874       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
   2875    }
   2876 }
   2877 
   2878 MAYBE_USED
   2879 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
   2880 {
   2881    PROF_EVENT(112, "new_mem_stack_12");
   2882    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2883       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2884       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2885    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2886       /* from previous test we don't have 8-alignment at offset +0,
   2887          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2888          do 4 at +0 and then 8 at +4/. */
   2889       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2890       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2891    } else {
   2892       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
   2893    }
   2894 }
   2895 
   2896 MAYBE_USED
   2897 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
   2898 {
   2899    PROF_EVENT(122, "die_mem_stack_12");
   2900    /* Note the -12 in the test */
   2901    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
   2902       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
   2903          -4. */
   2904       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2905       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2906    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2907       /* We have 4-alignment at +0, but we don't have 8-alignment at
   2908          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
   2909          and then 8 at -8. */
   2910       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2911       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2912    } else {
   2913       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
   2914    }
   2915 }
   2916 
   2917 /*--------------- adjustment by 16 bytes ---------------*/
   2918 
   2919 MAYBE_USED
   2920 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
   2921 {
   2922    UInt otag = ecu | MC_OKIND_STACK;
   2923    PROF_EVENT(113, "new_mem_stack_16");
   2924    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2925       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2926       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2927       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2928    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2929       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2930          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2931       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2932       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2933       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2934    } else {
   2935       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
   2936    }
   2937 }
   2938 
   2939 MAYBE_USED
   2940 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
   2941 {
   2942    PROF_EVENT(113, "new_mem_stack_16");
   2943    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2944       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2945       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2946       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2947    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2948       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2949          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2950       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2951       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
   2952       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   2953    } else {
   2954       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
   2955    }
   2956 }
   2957 
   2958 MAYBE_USED
   2959 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
   2960 {
   2961    PROF_EVENT(123, "die_mem_stack_16");
   2962    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2963       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
   2964       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2965       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2966    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2967       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
   2968       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2969       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2970       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2971    } else {
   2972       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
   2973    }
   2974 }
   2975 
   2976 /*--------------- adjustment by 32 bytes ---------------*/
   2977 
   2978 MAYBE_USED
   2979 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
   2980 {
   2981    UInt otag = ecu | MC_OKIND_STACK;
   2982    PROF_EVENT(114, "new_mem_stack_32");
   2983    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2984       /* Straightforward */
   2985       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2986       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2987       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2988       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2989    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2990       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   2991          +0,+28. */
   2992       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2993       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2994       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2995       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
   2996       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
   2997    } else {
   2998       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
   2999    }
   3000 }
   3001 
   3002 MAYBE_USED
   3003 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
   3004 {
   3005    PROF_EVENT(114, "new_mem_stack_32");
   3006    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3007       /* Straightforward */
   3008       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3009       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3010       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3011       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3012    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3013       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   3014          +0,+28. */
   3015       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3016       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   3017       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   3018       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
   3019       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
   3020    } else {
   3021       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
   3022    }
   3023 }
   3024 
   3025 MAYBE_USED
   3026 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
   3027 {
   3028    PROF_EVENT(124, "die_mem_stack_32");
   3029    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3030       /* Straightforward */
   3031       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3032       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3033       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3034       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3035    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3036       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
   3037          4 at -32,-4. */
   3038       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3039       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
   3040       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
   3041       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   3042       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   3043    } else {
   3044       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
   3045    }
   3046 }
   3047 
   3048 /*--------------- adjustment by 112 bytes ---------------*/
   3049 
   3050 MAYBE_USED
   3051 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
   3052 {
   3053    UInt otag = ecu | MC_OKIND_STACK;
   3054    PROF_EVENT(115, "new_mem_stack_112");
   3055    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3056       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   3057       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   3058       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   3059       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   3060       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   3061       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   3062       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   3063       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   3064       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   3065       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   3066       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   3067       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   3068       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   3069       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3070    } else {
   3071       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
   3072    }
   3073 }
   3074 
   3075 MAYBE_USED
   3076 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
   3077 {
   3078    PROF_EVENT(115, "new_mem_stack_112");
   3079    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3080       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3081       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3082       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3083       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3084       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3085       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3086       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3087       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3088       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3089       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3090       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3091       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3092       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3093       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3094    } else {
   3095       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
   3096    }
   3097 }
   3098 
   3099 MAYBE_USED
   3100 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
   3101 {
   3102    PROF_EVENT(125, "die_mem_stack_112");
   3103    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3104       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3105       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3106       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3107       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3108       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3109       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3110       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3111       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3112       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3113       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3114       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3115       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3116       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3117       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3118    } else {
   3119       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
   3120    }
   3121 }
   3122 
   3123 /*--------------- adjustment by 128 bytes ---------------*/
   3124 
   3125 MAYBE_USED
   3126 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
   3127 {
   3128    UInt otag = ecu | MC_OKIND_STACK;
   3129    PROF_EVENT(116, "new_mem_stack_128");
   3130    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3131       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   3132       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   3133       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   3134       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   3135       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   3136       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   3137       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   3138       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   3139       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   3140       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   3141       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   3142       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   3143       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   3144       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3145       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3146       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3147    } else {
   3148       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
   3149    }
   3150 }
   3151 
   3152 MAYBE_USED
   3153 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
   3154 {
   3155    PROF_EVENT(116, "new_mem_stack_128");
   3156    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3157       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3158       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3159       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3160       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3161       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3162       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3163       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3164       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3165       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3166       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3167       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3168       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3169       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3170       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3171       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3172       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3173    } else {
   3174       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
   3175    }
   3176 }
   3177 
   3178 MAYBE_USED
   3179 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
   3180 {
   3181    PROF_EVENT(126, "die_mem_stack_128");
   3182    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3183       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3184       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3185       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3186       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3187       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3188       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3189       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3190       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3191       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3192       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3193       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3194       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3195       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3196       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3197       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3198       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3199    } else {
   3200       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
   3201    }
   3202 }
   3203 
   3204 /*--------------- adjustment by 144 bytes ---------------*/
   3205 
   3206 MAYBE_USED
   3207 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
   3208 {
   3209    UInt otag = ecu | MC_OKIND_STACK;
   3210    PROF_EVENT(117, "new_mem_stack_144");
   3211    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3212       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3213       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3214       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3215       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3216       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3217       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3218       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3219       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3220       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3221       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3222       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3223       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3224       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3225       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3226       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3227       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3228       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3229       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3230    } else {
   3231       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
   3232    }
   3233 }
   3234 
   3235 MAYBE_USED
   3236 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
   3237 {
   3238    PROF_EVENT(117, "new_mem_stack_144");
   3239    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3240       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3241       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3242       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3243       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3244       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3245       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3246       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3247       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3248       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3249       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3250       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3251       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3252       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3253       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3254       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3255       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3256       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3257       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3258    } else {
   3259       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
   3260    }
   3261 }
   3262 
   3263 MAYBE_USED
   3264 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
   3265 {
   3266    PROF_EVENT(127, "die_mem_stack_144");
   3267    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3268       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3269       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3270       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3271       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3272       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3273       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3274       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3275       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3276       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3277       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3278       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3279       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3280       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3281       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3282       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3283       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3284       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3285       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3286    } else {
   3287       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
   3288    }
   3289 }
   3290 
   3291 /*--------------- adjustment by 160 bytes ---------------*/
   3292 
   3293 MAYBE_USED
   3294 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
   3295 {
   3296    UInt otag = ecu | MC_OKIND_STACK;
   3297    PROF_EVENT(118, "new_mem_stack_160");
   3298    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3299       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3300       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3301       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3302       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3303       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3304       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3305       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3306       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3307       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3308       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3309       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3310       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3311       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3312       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3313       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3314       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3315       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3316       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3317       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
   3318       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
   3319    } else {
   3320       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
   3321    }
   3322 }
   3323 
   3324 MAYBE_USED
   3325 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
   3326 {
   3327    PROF_EVENT(118, "new_mem_stack_160");
   3328    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3329       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3330       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3331       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3332       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3333       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3334       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3335       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3336       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3337       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3338       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3339       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3340       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3341       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3342       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3343       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3344       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3345       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3346       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3347       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
   3348       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
   3349    } else {
   3350       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
   3351    }
   3352 }
   3353 
   3354 MAYBE_USED
   3355 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
   3356 {
   3357    PROF_EVENT(128, "die_mem_stack_160");
   3358    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3359       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
   3360       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
   3361       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3362       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3363       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3364       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3365       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3366       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3367       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3368       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3369       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3370       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3371       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3372       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3373       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3374       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3375       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3376       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3377       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3378       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3379    } else {
   3380       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
   3381    }
   3382 }
   3383 
   3384 /*--------------- adjustment by N bytes ---------------*/
   3385 
   3386 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
   3387 {
   3388    UInt otag = ecu | MC_OKIND_STACK;
   3389    PROF_EVENT(115, "new_mem_stack_w_otag");
   3390    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
   3391 }
   3392 
   3393 static void mc_new_mem_stack ( Addr a, SizeT len )
   3394 {
   3395    PROF_EVENT(115, "new_mem_stack");
   3396    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
   3397 }
   3398 
   3399 static void mc_die_mem_stack ( Addr a, SizeT len )
   3400 {
   3401    PROF_EVENT(125, "die_mem_stack");
   3402    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
   3403 }
   3404 
   3405 
   3406 /* The AMD64 ABI says:
   3407 
   3408    "The 128-byte area beyond the location pointed to by %rsp is considered
   3409     to be reserved and shall not be modified by signal or interrupt
   3410     handlers.  Therefore, functions may use this area for temporary data
   3411     that is not needed across function calls.  In particular, leaf functions
   3412     may use this area for their entire stack frame, rather than adjusting
   3413     the stack pointer in the prologue and epilogue.  This area is known as
   3414     red zone [sic]."
   3415 
   3416    So after any call or return we need to mark this redzone as containing
   3417    undefined values.
   3418 
   3419    Consider this:  we're in function f.  f calls g.  g moves rsp down
   3420    modestly (say 16 bytes) and writes stuff all over the red zone, making it
   3421    defined.  g returns.  f is buggy and reads from parts of the red zone
   3422    that it didn't write on.  But because g filled that area in, f is going
   3423    to be picking up defined V bits and so any errors from reading bits of
   3424    the red zone it didn't write, will be missed.  The only solution I could
   3425    think of was to make the red zone undefined when g returns to f.
   3426 
   3427    This is in accordance with the ABI, which makes it clear the redzone
   3428    is volatile across function calls.
   3429 
   3430    The problem occurs the other way round too: f could fill the RZ up
   3431    with defined values and g could mistakenly read them.  So the RZ
   3432    also needs to be nuked on function calls.
   3433 */
   3434 
   3435 
   3436 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
   3437    improved so as to have a lower miss rate. */
   3438 
   3439 static UWord stats__nia_cache_queries = 0;
   3440 static UWord stats__nia_cache_misses  = 0;
   3441 
   3442 typedef
   3443    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
   3444             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
   3445    WCacheEnt;
   3446 
   3447 #define N_NIA_TO_ECU_CACHE 511
   3448 
   3449 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
   3450 
   3451 static void init_nia_to_ecu_cache ( void )
   3452 {
   3453    UWord       i;
   3454    Addr        zero_addr = 0;
   3455    ExeContext* zero_ec;
   3456    UInt        zero_ecu;
   3457    /* Fill all the slots with an entry for address zero, and the
   3458       relevant otags accordingly.  Hence the cache is initially filled
   3459       with valid data. */
   3460    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
   3461    tl_assert(zero_ec);
   3462    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
   3463    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
   3464    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
   3465       nia_to_ecu_cache[i].nia0 = zero_addr;
   3466       nia_to_ecu_cache[i].ecu0 = zero_ecu;
   3467       nia_to_ecu_cache[i].nia1 = zero_addr;
   3468       nia_to_ecu_cache[i].ecu1 = zero_ecu;
   3469    }
   3470 }
   3471 
   3472 static inline UInt convert_nia_to_ecu ( Addr nia )
   3473 {
   3474    UWord i;
   3475    UInt        ecu;
   3476    ExeContext* ec;
   3477 
   3478    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
   3479 
   3480    stats__nia_cache_queries++;
   3481    i = nia % N_NIA_TO_ECU_CACHE;
   3482    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
   3483 
   3484    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
   3485       return nia_to_ecu_cache[i].ecu0;
   3486 
   3487    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
   3488 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
   3489       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
   3490       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
   3491 #     undef SWAP
   3492       return nia_to_ecu_cache[i].ecu0;
   3493    }
   3494 
   3495    stats__nia_cache_misses++;
   3496    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
   3497    tl_assert(ec);
   3498    ecu = VG_(get_ECU_from_ExeContext)(ec);
   3499    tl_assert(VG_(is_plausible_ECU)(ecu));
   3500 
   3501    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
   3502    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
   3503 
   3504    nia_to_ecu_cache[i].nia0 = nia;
   3505    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
   3506    return ecu;
   3507 }
   3508 
   3509 
   3510 /* Note that this serves both the origin-tracking and
   3511    no-origin-tracking modes.  We assume that calls to it are
   3512    sufficiently infrequent that it isn't worth specialising for the
   3513    with/without origin-tracking cases. */
   3514 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
   3515 {
   3516    UInt otag;
   3517    tl_assert(sizeof(UWord) == sizeof(SizeT));
   3518    if (0)
   3519       VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
   3520                   base, len, nia );
   3521 
   3522    if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3523       UInt ecu = convert_nia_to_ecu ( nia );
   3524       tl_assert(VG_(is_plausible_ECU)(ecu));
   3525       otag = ecu | MC_OKIND_STACK;
   3526    } else {
   3527       tl_assert(nia == 0);
   3528       otag = 0;
   3529    }
   3530 
   3531 #  if 0
   3532    /* Really slow version */
   3533    MC_(make_mem_undefined)(base, len, otag);
   3534 #  endif
   3535 
   3536 #  if 0
   3537    /* Slow(ish) version, which is fairly easily seen to be correct.
   3538    */
   3539    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
   3540       make_aligned_word64_undefined(base +   0, otag);
   3541       make_aligned_word64_undefined(base +   8, otag);
   3542       make_aligned_word64_undefined(base +  16, otag);
   3543       make_aligned_word64_undefined(base +  24, otag);
   3544 
   3545       make_aligned_word64_undefined(base +  32, otag);
   3546       make_aligned_word64_undefined(base +  40, otag);
   3547       make_aligned_word64_undefined(base +  48, otag);
   3548       make_aligned_word64_undefined(base +  56, otag);
   3549 
   3550       make_aligned_word64_undefined(base +  64, otag);
   3551       make_aligned_word64_undefined(base +  72, otag);
   3552       make_aligned_word64_undefined(base +  80, otag);
   3553       make_aligned_word64_undefined(base +  88, otag);
   3554 
   3555       make_aligned_word64_undefined(base +  96, otag);
   3556       make_aligned_word64_undefined(base + 104, otag);
   3557       make_aligned_word64_undefined(base + 112, otag);
   3558       make_aligned_word64_undefined(base + 120, otag);
   3559    } else {
   3560       MC_(make_mem_undefined)(base, len, otag);
   3561    }
   3562 #  endif
   3563 
   3564    /* Idea is: go fast when
   3565          * 8-aligned and length is 128
   3566          * the sm is available in the main primary map
   3567          * the address range falls entirely with a single secondary map
   3568       If all those conditions hold, just update the V+A bits by writing
   3569       directly into the vabits array.  (If the sm was distinguished, this
   3570       will make a copy and then write to it.)
   3571    */
   3572 
   3573    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
   3574       /* Now we know the address range is suitably sized and aligned. */
   3575       UWord a_lo = (UWord)(base);
   3576       UWord a_hi = (UWord)(base + 128 - 1);
   3577       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3578       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3579          // Now we know the entire range is within the main primary map.
   3580          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3581          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3582          /* Now we know that the entire address range falls within a
   3583             single secondary map, and that that secondary 'lives' in
   3584             the main primary map. */
   3585          if (LIKELY(sm == sm_hi)) {
   3586             // Finally, we know that the range is entirely within one secmap.
   3587             UWord   v_off = SM_OFF(a_lo);
   3588             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3589             p[ 0] = VA_BITS16_UNDEFINED;
   3590             p[ 1] = VA_BITS16_UNDEFINED;
   3591             p[ 2] = VA_BITS16_UNDEFINED;
   3592             p[ 3] = VA_BITS16_UNDEFINED;
   3593             p[ 4] = VA_BITS16_UNDEFINED;
   3594             p[ 5] = VA_BITS16_UNDEFINED;
   3595             p[ 6] = VA_BITS16_UNDEFINED;
   3596             p[ 7] = VA_BITS16_UNDEFINED;
   3597             p[ 8] = VA_BITS16_UNDEFINED;
   3598             p[ 9] = VA_BITS16_UNDEFINED;
   3599             p[10] = VA_BITS16_UNDEFINED;
   3600             p[11] = VA_BITS16_UNDEFINED;
   3601             p[12] = VA_BITS16_UNDEFINED;
   3602             p[13] = VA_BITS16_UNDEFINED;
   3603             p[14] = VA_BITS16_UNDEFINED;
   3604             p[15] = VA_BITS16_UNDEFINED;
   3605             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3606                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3607                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3608                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3609                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3610                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3611                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3612                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3613                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3614                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3615                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3616                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3617                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3618                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3619                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3620                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3621                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3622             }
   3623             return;
   3624          }
   3625       }
   3626    }
   3627 
   3628    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
   3629    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
   3630       /* Now we know the address range is suitably sized and aligned. */
   3631       UWord a_lo = (UWord)(base);
   3632       UWord a_hi = (UWord)(base + 288 - 1);
   3633       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3634       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3635          // Now we know the entire range is within the main primary map.
   3636          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3637          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3638          /* Now we know that the entire address range falls within a
   3639             single secondary map, and that that secondary 'lives' in
   3640             the main primary map. */
   3641          if (LIKELY(sm == sm_hi)) {
   3642             // Finally, we know that the range is entirely within one secmap.
   3643             UWord   v_off = SM_OFF(a_lo);
   3644             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3645             p[ 0] = VA_BITS16_UNDEFINED;
   3646             p[ 1] = VA_BITS16_UNDEFINED;
   3647             p[ 2] = VA_BITS16_UNDEFINED;
   3648             p[ 3] = VA_BITS16_UNDEFINED;
   3649             p[ 4] = VA_BITS16_UNDEFINED;
   3650             p[ 5] = VA_BITS16_UNDEFINED;
   3651             p[ 6] = VA_BITS16_UNDEFINED;
   3652             p[ 7] = VA_BITS16_UNDEFINED;
   3653             p[ 8] = VA_BITS16_UNDEFINED;
   3654             p[ 9] = VA_BITS16_UNDEFINED;
   3655             p[10] = VA_BITS16_UNDEFINED;
   3656             p[11] = VA_BITS16_UNDEFINED;
   3657             p[12] = VA_BITS16_UNDEFINED;
   3658             p[13] = VA_BITS16_UNDEFINED;
   3659             p[14] = VA_BITS16_UNDEFINED;
   3660             p[15] = VA_BITS16_UNDEFINED;
   3661             p[16] = VA_BITS16_UNDEFINED;
   3662             p[17] = VA_BITS16_UNDEFINED;
   3663             p[18] = VA_BITS16_UNDEFINED;
   3664             p[19] = VA_BITS16_UNDEFINED;
   3665             p[20] = VA_BITS16_UNDEFINED;
   3666             p[21] = VA_BITS16_UNDEFINED;
   3667             p[22] = VA_BITS16_UNDEFINED;
   3668             p[23] = VA_BITS16_UNDEFINED;
   3669             p[24] = VA_BITS16_UNDEFINED;
   3670             p[25] = VA_BITS16_UNDEFINED;
   3671             p[26] = VA_BITS16_UNDEFINED;
   3672             p[27] = VA_BITS16_UNDEFINED;
   3673             p[28] = VA_BITS16_UNDEFINED;
   3674             p[29] = VA_BITS16_UNDEFINED;
   3675             p[30] = VA_BITS16_UNDEFINED;
   3676             p[31] = VA_BITS16_UNDEFINED;
   3677             p[32] = VA_BITS16_UNDEFINED;
   3678             p[33] = VA_BITS16_UNDEFINED;
   3679             p[34] = VA_BITS16_UNDEFINED;
   3680             p[35] = VA_BITS16_UNDEFINED;
   3681             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3682                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3683                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3684                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3685                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3686                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3687                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3688                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3689                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3690                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3691                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3692                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3693                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3694                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3695                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3696                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3697                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3698                set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
   3699                set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
   3700                set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
   3701                set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
   3702                set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
   3703                set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
   3704                set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
   3705                set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
   3706                set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
   3707                set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
   3708                set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
   3709                set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
   3710                set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
   3711                set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
   3712                set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
   3713                set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
   3714                set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
   3715                set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
   3716                set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
   3717                set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
   3718             }
   3719             return;
   3720          }
   3721       }
   3722    }
   3723 
   3724    /* else fall into slow case */
   3725    MC_(make_mem_undefined_w_otag)(base, len, otag);
   3726 }
   3727 
   3728 
   3729 /*------------------------------------------------------------*/
   3730 /*--- Checking memory                                      ---*/
   3731 /*------------------------------------------------------------*/
   3732 
   3733 typedef
   3734    enum {
   3735       MC_Ok = 5,
   3736       MC_AddrErr = 6,
   3737       MC_ValueErr = 7
   3738    }
   3739    MC_ReadResult;
   3740 
   3741 
   3742 /* Check permissions for address range.  If inadequate permissions
   3743    exist, *bad_addr is set to the offending address, so the caller can
   3744    know what it is. */
   3745 
   3746 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
   3747    returns False, and if bad_addr is non-NULL, sets *bad_addr to
   3748    indicate the lowest failing address.  Functions below are
   3749    similar. */
   3750 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
   3751 {
   3752    SizeT i;
   3753    UWord vabits2;
   3754 
   3755    PROF_EVENT(60, "check_mem_is_noaccess");
   3756    for (i = 0; i < len; i++) {
   3757       PROF_EVENT(61, "check_mem_is_noaccess(loop)");
   3758       vabits2 = get_vabits2(a);
   3759       if (VA_BITS2_NOACCESS != vabits2) {
   3760          if (bad_addr != NULL) *bad_addr = a;
   3761          return False;
   3762       }
   3763       a++;
   3764    }
   3765    return True;
   3766 }
   3767 
   3768 static Bool is_mem_addressable ( Addr a, SizeT len,
   3769                                  /*OUT*/Addr* bad_addr )
   3770 {
   3771    SizeT i;
   3772    UWord vabits2;
   3773 
   3774    PROF_EVENT(62, "is_mem_addressable");
   3775    for (i = 0; i < len; i++) {
   3776       PROF_EVENT(63, "is_mem_addressable(loop)");
   3777       vabits2 = get_vabits2(a);
   3778       if (VA_BITS2_NOACCESS == vabits2) {
   3779          if (bad_addr != NULL) *bad_addr = a;
   3780          return False;
   3781       }
   3782       a++;
   3783    }
   3784    return True;
   3785 }
   3786 
   3787 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
   3788                                       /*OUT*/Addr* bad_addr,
   3789                                       /*OUT*/UInt* otag )
   3790 {
   3791    SizeT i;
   3792    UWord vabits2;
   3793 
   3794    PROF_EVENT(64, "is_mem_defined");
   3795    DEBUG("is_mem_defined\n");
   3796 
   3797    if (otag)     *otag = 0;
   3798    if (bad_addr) *bad_addr = 0;
   3799    for (i = 0; i < len; i++) {
   3800       PROF_EVENT(65, "is_mem_defined(loop)");
   3801       vabits2 = get_vabits2(a);
   3802       if (VA_BITS2_DEFINED != vabits2) {
   3803          // Error!  Nb: Report addressability errors in preference to
   3804          // definedness errors.  And don't report definedeness errors unless
   3805          // --undef-value-errors=yes.
   3806          if (bad_addr) {
   3807             *bad_addr = a;
   3808          }
   3809          if (VA_BITS2_NOACCESS == vabits2) {
   3810             return MC_AddrErr;
   3811          }
   3812          if (MC_(clo_mc_level) >= 2) {
   3813             if (otag && MC_(clo_mc_level) == 3) {
   3814                *otag = MC_(helperc_b_load1)( a );
   3815             }
   3816             return MC_ValueErr;
   3817          }
   3818       }
   3819       a++;
   3820    }
   3821    return MC_Ok;
   3822 }
   3823 
   3824 
   3825 /* Like is_mem_defined but doesn't give up at the first uninitialised
   3826    byte -- the entire range is always checked.  This is important for
   3827    detecting errors in the case where a checked range strays into
   3828    invalid memory, but that fact is not detected by the ordinary
   3829    is_mem_defined(), because of an undefined section that precedes the
   3830    out of range section, possibly as a result of an alignment hole in
   3831    the checked data.  This version always checks the entire range and
   3832    can report both a definedness and an accessbility error, if
   3833    necessary. */
   3834 static void is_mem_defined_comprehensive (
   3835                Addr a, SizeT len,
   3836                /*OUT*/Bool* errorV,    /* is there a definedness err? */
   3837                /*OUT*/Addr* bad_addrV, /* if so where? */
   3838                /*OUT*/UInt* otagV,     /* and what's its otag? */
   3839                /*OUT*/Bool* errorA,    /* is there an addressability err? */
   3840                /*OUT*/Addr* bad_addrA  /* if so where? */
   3841             )
   3842 {
   3843    SizeT i;
   3844    UWord vabits2;
   3845    Bool  already_saw_errV = False;
   3846 
   3847    PROF_EVENT(64, "is_mem_defined"); // fixme
   3848    DEBUG("is_mem_defined_comprehensive\n");
   3849 
   3850    tl_assert(!(*errorV || *errorA));
   3851 
   3852    for (i = 0; i < len; i++) {
   3853       PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
   3854       vabits2 = get_vabits2(a);
   3855       switch (vabits2) {
   3856          case VA_BITS2_DEFINED:
   3857             a++;
   3858             break;
   3859          case VA_BITS2_UNDEFINED:
   3860          case VA_BITS2_PARTDEFINED:
   3861             if (!already_saw_errV) {
   3862                *errorV    = True;
   3863                *bad_addrV = a;
   3864                if (MC_(clo_mc_level) == 3) {
   3865                   *otagV = MC_(helperc_b_load1)( a );
   3866                } else {
   3867                   *otagV = 0;
   3868                }
   3869                already_saw_errV = True;
   3870             }
   3871             a++; /* keep going */
   3872             break;
   3873          case VA_BITS2_NOACCESS:
   3874             *errorA    = True;
   3875             *bad_addrA = a;
   3876             return; /* give up now. */
   3877          default:
   3878             tl_assert(0);
   3879       }
   3880    }
   3881 }
   3882 
   3883 
   3884 /* Check a zero-terminated ascii string.  Tricky -- don't want to
   3885    examine the actual bytes, to find the end, until we're sure it is
   3886    safe to do so. */
   3887 
   3888 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
   3889 {
   3890    UWord vabits2;
   3891 
   3892    PROF_EVENT(66, "mc_is_defined_asciiz");
   3893    DEBUG("mc_is_defined_asciiz\n");
   3894 
   3895    if (otag)     *otag = 0;
   3896    if (bad_addr) *bad_addr = 0;
   3897    while (True) {
   3898       PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
   3899       vabits2 = get_vabits2(a);
   3900       if (VA_BITS2_DEFINED != vabits2) {
   3901          // Error!  Nb: Report addressability errors in preference to
   3902          // definedness errors.  And don't report definedeness errors unless
   3903          // --undef-value-errors=yes.
   3904          if (bad_addr) {
   3905             *bad_addr = a;
   3906          }
   3907          if (VA_BITS2_NOACCESS == vabits2) {
   3908             return MC_AddrErr;
   3909          }
   3910          if (MC_(clo_mc_level) >= 2) {
   3911             if (otag && MC_(clo_mc_level) == 3) {
   3912                *otag = MC_(helperc_b_load1)( a );
   3913             }
   3914             return MC_ValueErr;
   3915          }
   3916       }
   3917       /* Ok, a is safe to read. */
   3918       if (* ((UChar*)a) == 0) {
   3919          return MC_Ok;
   3920       }
   3921       a++;
   3922    }
   3923 }
   3924 
   3925 
   3926 /*------------------------------------------------------------*/
   3927 /*--- Memory event handlers                                ---*/
   3928 /*------------------------------------------------------------*/
   3929 
   3930 static
   3931 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
   3932                                 Addr base, SizeT size )
   3933 {
   3934    Addr bad_addr;
   3935    Bool ok = is_mem_addressable ( base, size, &bad_addr );
   3936 
   3937    if (!ok) {
   3938       switch (part) {
   3939       case Vg_CoreSysCall:
   3940          MC_(record_memparam_error) ( tid, bad_addr,
   3941                                       /*isAddrErr*/True, s, 0/*otag*/ );
   3942          break;
   3943 
   3944       case Vg_CoreSignal:
   3945          MC_(record_core_mem_error)( tid, s );
   3946          break;
   3947 
   3948       default:
   3949          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
   3950       }
   3951    }
   3952 }
   3953 
   3954 static
   3955 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
   3956                             Addr base, SizeT size )
   3957 {
   3958    UInt otag = 0;
   3959    Addr bad_addr;
   3960    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
   3961 
   3962    if (MC_Ok != res) {
   3963       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   3964 
   3965       switch (part) {
   3966       case Vg_CoreSysCall:
   3967          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   3968                                       isAddrErr ? 0 : otag );
   3969          break;
   3970 
   3971       case Vg_CoreSysCallArgInMem:
   3972          MC_(record_regparam_error) ( tid, s, otag );
   3973          break;
   3974 
   3975       /* If we're being asked to jump to a silly address, record an error
   3976          message before potentially crashing the entire system. */
   3977       case Vg_CoreTranslate:
   3978          MC_(record_jump_error)( tid, bad_addr );
   3979          break;
   3980 
   3981       default:
   3982          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
   3983       }
   3984    }
   3985 }
   3986 
   3987 static
   3988 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
   3989                                    const HChar* s, Addr str )
   3990 {
   3991    MC_ReadResult res;
   3992    Addr bad_addr = 0;   // shut GCC up
   3993    UInt otag = 0;
   3994 
   3995    tl_assert(part == Vg_CoreSysCall);
   3996    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
   3997    if (MC_Ok != res) {
   3998       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   3999       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   4000                                    isAddrErr ? 0 : otag );
   4001    }
   4002 }
   4003 
   4004 /* Handling of mmap and mprotect is not as simple as it seems.
   4005 
   4006    The underlying semantics are that memory obtained from mmap is
   4007    always initialised, but may be inaccessible.  And changes to the
   4008    protection of memory do not change its contents and hence not its
   4009    definedness state.  Problem is we can't model
   4010    inaccessible-but-with-some-definedness state; once we mark memory
   4011    as inaccessible we lose all info about definedness, and so can't
   4012    restore that if it is later made accessible again.
   4013 
   4014    One obvious thing to do is this:
   4015 
   4016       mmap/mprotect NONE  -> noaccess
   4017       mmap/mprotect other -> defined
   4018 
   4019    The problem case here is: taking accessible memory, writing
   4020    uninitialised data to it, mprotecting it NONE and later mprotecting
   4021    it back to some accessible state causes the undefinedness to be
   4022    lost.
   4023 
   4024    A better proposal is:
   4025 
   4026      (1) mmap NONE       ->  make noaccess
   4027      (2) mmap other      ->  make defined
   4028 
   4029      (3) mprotect NONE   ->  # no change
   4030      (4) mprotect other  ->  change any "noaccess" to "defined"
   4031 
   4032    (2) is OK because memory newly obtained from mmap really is defined
   4033        (zeroed out by the kernel -- doing anything else would
   4034        constitute a massive security hole.)
   4035 
   4036    (1) is OK because the only way to make the memory usable is via
   4037        (4), in which case we also wind up correctly marking it all as
   4038        defined.
   4039 
   4040    (3) is the weak case.  We choose not to change memory state.
   4041        (presumably the range is in some mixture of "defined" and
   4042        "undefined", viz, accessible but with arbitrary V bits).  Doing
   4043        nothing means we retain the V bits, so that if the memory is
   4044        later mprotected "other", the V bits remain unchanged, so there
   4045        can be no false negatives.  The bad effect is that if there's
   4046        an access in the area, then MC cannot warn; but at least we'll
   4047        get a SEGV to show, so it's better than nothing.
   4048 
   4049    Consider the sequence (3) followed by (4).  Any memory that was
   4050    "defined" or "undefined" previously retains its state (as
   4051    required).  Any memory that was "noaccess" before can only have
   4052    been made that way by (1), and so it's OK to change it to
   4053    "defined".
   4054 
   4055    See https://bugs.kde.org/show_bug.cgi?id=205541
   4056    and https://bugs.kde.org/show_bug.cgi?id=210268
   4057 */
   4058 static
   4059 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
   4060                        ULong di_handle )
   4061 {
   4062    if (rr || ww || xx) {
   4063       /* (2) mmap/mprotect other -> defined */
   4064       MC_(make_mem_defined)(a, len);
   4065    } else {
   4066       /* (1) mmap/mprotect NONE  -> noaccess */
   4067       MC_(make_mem_noaccess)(a, len);
   4068    }
   4069 }
   4070 
   4071 static
   4072 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
   4073 {
   4074    if (rr || ww || xx) {
   4075       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
   4076       make_mem_defined_if_noaccess(a, len);
   4077    } else {
   4078       /* (3) mprotect NONE   ->  # no change */
   4079       /* do nothing */
   4080    }
   4081 }
   4082 
   4083 
   4084 static
   4085 void mc_new_mem_startup( Addr a, SizeT len,
   4086                          Bool rr, Bool ww, Bool xx, ULong di_handle )
   4087 {
   4088    // Because code is defined, initialised variables get put in the data
   4089    // segment and are defined, and uninitialised variables get put in the
   4090    // bss segment and are auto-zeroed (and so defined).
   4091    //
   4092    // It's possible that there will be padding between global variables.
   4093    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
   4094    // a program uses it, Memcheck will not complain.  This is arguably a
   4095    // false negative, but it's a grey area -- the behaviour is defined (the
   4096    // padding is zeroed) but it's probably not what the user intended.  And
   4097    // we can't avoid it.
   4098    //
   4099    // Note: we generally ignore RWX permissions, because we can't track them
   4100    // without requiring more than one A bit which would slow things down a
   4101    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
   4102    // So we mark any such pages as "unaddressable".
   4103    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
   4104          a, (ULong)len, rr, ww, xx);
   4105    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
   4106 }
   4107 
   4108 static
   4109 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
   4110 {
   4111    MC_(make_mem_defined)(a, len);
   4112 }
   4113 
   4114 
   4115 /*------------------------------------------------------------*/
   4116 /*--- Register event handlers                              ---*/
   4117 /*------------------------------------------------------------*/
   4118 
   4119 /* Try and get a nonzero origin for the guest state section of thread
   4120    tid characterised by (offset,size).  Return 0 if nothing to show
   4121    for it. */
   4122 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
   4123                                              Int offset, SizeT size )
   4124 {
   4125    Int   sh2off;
   4126    UInt  area[3];
   4127    UInt  otag;
   4128    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
   4129    if (sh2off == -1)
   4130       return 0;  /* This piece of guest state is not tracked */
   4131    tl_assert(sh2off >= 0);
   4132    tl_assert(0 == (sh2off % 4));
   4133    area[0] = 0x31313131;
   4134    area[2] = 0x27272727;
   4135    VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
   4136    tl_assert(area[0] == 0x31313131);
   4137    tl_assert(area[2] == 0x27272727);
   4138    otag = area[1];
   4139    return otag;
   4140 }
   4141 
   4142 
   4143 /* When some chunk of guest state is written, mark the corresponding
   4144    shadow area as valid.  This is used to initialise arbitrarily large
   4145    chunks of guest state, hence the _SIZE value, which has to be as
   4146    big as the biggest guest state.
   4147 */
   4148 static void mc_post_reg_write ( CorePart part, ThreadId tid,
   4149                                 PtrdiffT offset, SizeT size)
   4150 {
   4151 #  define MAX_REG_WRITE_SIZE 1696
   4152    UChar area[MAX_REG_WRITE_SIZE];
   4153    tl_assert(size <= MAX_REG_WRITE_SIZE);
   4154    VG_(memset)(area, V_BITS8_DEFINED, size);
   4155    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
   4156 #  undef MAX_REG_WRITE_SIZE
   4157 }
   4158 
   4159 static
   4160 void mc_post_reg_write_clientcall ( ThreadId tid,
   4161                                     PtrdiffT offset, SizeT size, Addr f)
   4162 {
   4163    mc_post_reg_write(/*dummy*/0, tid, offset, size);
   4164 }
   4165 
   4166 /* Look at the definedness of the guest's shadow state for
   4167    [offset, offset+len).  If any part of that is undefined, record
   4168    a parameter error.
   4169 */
   4170 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
   4171                               PtrdiffT offset, SizeT size)
   4172 {
   4173    Int   i;
   4174    Bool  bad;
   4175    UInt  otag;
   4176 
   4177    UChar area[16];
   4178    tl_assert(size <= 16);
   4179 
   4180    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
   4181 
   4182    bad = False;
   4183    for (i = 0; i < size; i++) {
   4184       if (area[i] != V_BITS8_DEFINED) {
   4185          bad = True;
   4186          break;
   4187       }
   4188    }
   4189 
   4190    if (!bad)
   4191       return;
   4192 
   4193    /* We've found some undefinedness.  See if we can also find an
   4194       origin for it. */
   4195    otag = mb_get_origin_for_guest_offset( tid, offset, size );
   4196    MC_(record_regparam_error) ( tid, s, otag );
   4197 }
   4198 
   4199 
   4200 /*------------------------------------------------------------*/
   4201 /*--- Functions called directly from generated code:       ---*/
   4202 /*--- Load/store handlers.                                 ---*/
   4203 /*------------------------------------------------------------*/
   4204 
   4205 /* Types:  LOADV32, LOADV16, LOADV8 are:
   4206                UWord fn ( Addr a )
   4207    so they return 32-bits on 32-bit machines and 64-bits on
   4208    64-bit machines.  Addr has the same size as a host word.
   4209 
   4210    LOADV64 is always  ULong fn ( Addr a )
   4211 
   4212    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
   4213    are a UWord, and for STOREV64 they are a ULong.
   4214 */
   4215 
   4216 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
   4217    naturally '_sz/8'-aligned, or it exceeds the range covered by the
   4218    primary map.  This is all very tricky (and important!), so let's
   4219    work through the maths by hand (below), *and* assert for these
   4220    values at startup. */
   4221 #define MASK(_szInBytes) \
   4222    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
   4223 
   4224 /* MASK only exists so as to define this macro. */
   4225 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
   4226    ((_a) & MASK((_szInBits>>3)))
   4227 
   4228 /* On a 32-bit machine:
   4229 
   4230    N_PRIMARY_BITS          == 16, so
   4231    N_PRIMARY_MAP           == 0x10000, so
   4232    N_PRIMARY_MAP-1         == 0xFFFF, so
   4233    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
   4234 
   4235    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
   4236            = ~ ( 0xFFFF | 0xFFFF0000 )
   4237            = ~ 0xFFFF'FFFF
   4238            = 0
   4239 
   4240    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
   4241            = ~ ( 0xFFFE | 0xFFFF0000 )
   4242            = ~ 0xFFFF'FFFE
   4243            = 1
   4244 
   4245    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
   4246            = ~ ( 0xFFFC | 0xFFFF0000 )
   4247            = ~ 0xFFFF'FFFC
   4248            = 3
   4249 
   4250    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
   4251            = ~ ( 0xFFF8 | 0xFFFF0000 )
   4252            = ~ 0xFFFF'FFF8
   4253            = 7
   4254 
   4255    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
   4256    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
   4257    the 1-byte alignment case, it is always a zero value, since MASK(1)
   4258    is zero.  All as expected.
   4259 
   4260    On a 64-bit machine, it's more complex, since we're testing
   4261    simultaneously for misalignment and for the address being at or
   4262    above 64G:
   4263 
   4264    N_PRIMARY_BITS          == 20, so
   4265    N_PRIMARY_MAP           == 0x100000, so
   4266    N_PRIMARY_MAP-1         == 0xFFFFF, so
   4267    (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
   4268 
   4269    MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
   4270            = ~ ( 0xFFFF | 0xF'FFFF'0000 )
   4271            = ~ 0xF'FFFF'FFFF
   4272            = 0xFFFF'FFF0'0000'0000
   4273 
   4274    MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
   4275            = ~ ( 0xFFFE | 0xF'FFFF'0000 )
   4276            = ~ 0xF'FFFF'FFFE
   4277            = 0xFFFF'FFF0'0000'0001
   4278 
   4279    MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
   4280            = ~ ( 0xFFFC | 0xF'FFFF'0000 )
   4281            = ~ 0xF'FFFF'FFFC
   4282            = 0xFFFF'FFF0'0000'0003
   4283 
   4284    MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
   4285            = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
   4286            = ~ 0xF'FFFF'FFF8
   4287            = 0xFFFF'FFF0'0000'0007
   4288 */
   4289 
   4290 
   4291 /* ------------------------ Size = 16 ------------------------ */
   4292 
   4293 static INLINE
   4294 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
   4295                            Addr a, SizeT nBits, Bool isBigEndian )
   4296 {
   4297    PROF_EVENT(200, "mc_LOADV_128_or_256");
   4298 
   4299 #ifndef PERF_FAST_LOADV
   4300    mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
   4301    return;
   4302 #else
   4303    {
   4304       UWord   sm_off16, vabits16, j;
   4305       UWord   nBytes  = nBits / 8;
   4306       UWord   nULongs = nBytes / 8;
   4307       SecMap* sm;
   4308 
   4309       if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
   4310          PROF_EVENT(201, "mc_LOADV_128_or_256-slow1");
   4311          mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
   4312          return;
   4313       }
   4314 
   4315       /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
   4316          suitably aligned, is mapped, and addressible. */
   4317       for (j = 0; j < nULongs; j++) {
   4318          sm       = get_secmap_for_reading_low(a + 8*j);
   4319          sm_off16 = SM_OFF_16(a + 8*j);
   4320          vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4321 
   4322          // Convert V bits from compact memory form to expanded
   4323          // register form.
   4324          if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
   4325             res[j] = V_BITS64_DEFINED;
   4326          } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
   4327             res[j] = V_BITS64_UNDEFINED;
   4328          } else {
   4329             /* Slow case: some block of 8 bytes are not all-defined or
   4330                all-undefined. */
   4331             PROF_EVENT(202, "mc_LOADV_128_or_256-slow2");
   4332             mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
   4333             return;
   4334          }
   4335       }
   4336       return;
   4337    }
   4338 #endif
   4339 }
   4340 
   4341 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
   4342 {
   4343    mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
   4344 }
   4345 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
   4346 {
   4347    mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
   4348 }
   4349 
   4350 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
   4351 {
   4352    mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
   4353 }
   4354 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
   4355 {
   4356    mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
   4357 }
   4358 
   4359 /* ------------------------ Size = 8 ------------------------ */
   4360 
   4361 static INLINE
   4362 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
   4363 {
   4364    PROF_EVENT(200, "mc_LOADV64");
   4365 
   4366 #ifndef PERF_FAST_LOADV
   4367    return mc_LOADVn_slow( a, 64, isBigEndian );
   4368 #else
   4369    {
   4370       UWord   sm_off16, vabits16;
   4371       SecMap* sm;
   4372 
   4373       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4374          PROF_EVENT(201, "mc_LOADV64-slow1");
   4375          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
   4376       }
   4377 
   4378       sm       = get_secmap_for_reading_low(a);
   4379       sm_off16 = SM_OFF_16(a);
   4380       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4381 
   4382       // Handle common case quickly: a is suitably aligned, is mapped, and
   4383       // addressible.
   4384       // Convert V bits from compact memory form to expanded register form.
   4385       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
   4386          return V_BITS64_DEFINED;
   4387       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
   4388          return V_BITS64_UNDEFINED;
   4389       } else {
   4390          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
   4391          PROF_EVENT(202, "mc_LOADV64-slow2");
   4392          return mc_LOADVn_slow( a, 64, isBigEndian );
   4393       }
   4394    }
   4395 #endif
   4396 }
   4397 
   4398 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
   4399 {
   4400    return mc_LOADV64(a, True);
   4401 }
   4402 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
   4403 {
   4404    return mc_LOADV64(a, False);
   4405 }
   4406 
   4407 
   4408 static INLINE
   4409 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
   4410 {
   4411    PROF_EVENT(210, "mc_STOREV64");
   4412 
   4413 #ifndef PERF_FAST_STOREV
   4414    // XXX: this slow case seems to be marginally faster than the fast case!
   4415    // Investigate further.
   4416    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4417 #else
   4418    {
   4419       UWord   sm_off16, vabits16;
   4420       SecMap* sm;
   4421 
   4422       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4423          PROF_EVENT(211, "mc_STOREV64-slow1");
   4424          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4425          return;
   4426       }
   4427 
   4428       sm       = get_secmap_for_reading_low(a);
   4429       sm_off16 = SM_OFF_16(a);
   4430       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4431 
   4432       // To understand the below cleverness, see the extensive comments
   4433       // in MC_(helperc_STOREV8).
   4434       if (LIKELY(V_BITS64_DEFINED == vbits64)) {
   4435          if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
   4436             return;
   4437          }
   4438          if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
   4439             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
   4440             return;
   4441          }
   4442          PROF_EVENT(232, "mc_STOREV64-slow2");
   4443          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4444          return;
   4445       }
   4446       if (V_BITS64_UNDEFINED == vbits64) {
   4447          if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
   4448             return;
   4449          }
   4450          if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
   4451             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
   4452             return;
   4453          }
   4454          PROF_EVENT(232, "mc_STOREV64-slow3");
   4455          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4456          return;
   4457       }
   4458 
   4459       PROF_EVENT(212, "mc_STOREV64-slow4");
   4460       mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4461    }
   4462 #endif
   4463 }
   4464 
   4465 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
   4466 {
   4467    mc_STOREV64(a, vbits64, True);
   4468 }
   4469 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
   4470 {
   4471    mc_STOREV64(a, vbits64, False);
   4472 }
   4473 
   4474 
   4475 /* ------------------------ Size = 4 ------------------------ */
   4476 
   4477 static INLINE
   4478 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
   4479 {
   4480    PROF_EVENT(220, "mc_LOADV32");
   4481 
   4482 #ifndef PERF_FAST_LOADV
   4483    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4484 #else
   4485    {
   4486       UWord   sm_off, vabits8;
   4487       SecMap* sm;
   4488 
   4489       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   4490          PROF_EVENT(221, "mc_LOADV32-slow1");
   4491          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4492       }
   4493 
   4494       sm      = get_secmap_for_reading_low(a);
   4495       sm_off  = SM_OFF(a);
   4496       vabits8 = sm->vabits8[sm_off];
   4497 
   4498       // Handle common case quickly: a is suitably aligned, is mapped, and the
   4499       // entire word32 it lives in is addressible.
   4500       // Convert V bits from compact memory form to expanded register form.
   4501       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
   4502       // Almost certainly not necessary, but be paranoid.
   4503       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   4504          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   4505       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
   4506          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   4507       } else {
   4508          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
   4509          PROF_EVENT(222, "mc_LOADV32-slow2");
   4510          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4511       }
   4512    }
   4513 #endif
   4514 }
   4515 
   4516 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
   4517 {
   4518    return mc_LOADV32(a, True);
   4519 }
   4520 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
   4521 {
   4522    return mc_LOADV32(a, False);
   4523 }
   4524 
   4525 
   4526 static INLINE
   4527 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
   4528 {
   4529    PROF_EVENT(230, "mc_STOREV32");
   4530 
   4531 #ifndef PERF_FAST_STOREV
   4532    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4533 #else
   4534    {
   4535       UWord   sm_off, vabits8;
   4536       SecMap* sm;
   4537 
   4538       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   4539          PROF_EVENT(231, "mc_STOREV32-slow1");
   4540          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4541          return;
   4542       }
   4543 
   4544       sm      = get_secmap_for_reading_low(a);
   4545       sm_off  = SM_OFF(a);
   4546       vabits8 = sm->vabits8[sm_off];
   4547 
   4548       // To understand the below cleverness, see the extensive comments
   4549       // in MC_(helperc_STOREV8).
   4550       if (LIKELY(V_BITS32_DEFINED == vbits32)) {
   4551          if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
   4552             return;
   4553          }
   4554          if (!is_distinguished_sm(sm)  && VA_BITS8_UNDEFINED == vabits8) {
   4555             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
   4556             return;
   4557          }
   4558          PROF_EVENT(232, "mc_STOREV32-slow2");
   4559          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4560          return;
   4561       }
   4562       if (V_BITS32_UNDEFINED == vbits32) {
   4563          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
   4564             return;
   4565          }
   4566          if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
   4567             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
   4568             return;
   4569          }
   4570          PROF_EVENT(233, "mc_STOREV32-slow3");
   4571          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4572          return;
   4573       }
   4574 
   4575       PROF_EVENT(234, "mc_STOREV32-slow4");
   4576       mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4577    }
   4578 #endif
   4579 }
   4580 
   4581 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
   4582 {
   4583    mc_STOREV32(a, vbits32, True);
   4584 }
   4585 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
   4586 {
   4587    mc_STOREV32(a, vbits32, False);
   4588 }
   4589 
   4590 
   4591 /* ------------------------ Size = 2 ------------------------ */
   4592 
   4593 static INLINE
   4594 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
   4595 {
   4596    PROF_EVENT(240, "mc_LOADV16");
   4597 
   4598 #ifndef PERF_FAST_LOADV
   4599    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4600 #else
   4601    {
   4602       UWord   sm_off, vabits8;
   4603       SecMap* sm;
   4604 
   4605       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   4606          PROF_EVENT(241, "mc_LOADV16-slow1");
   4607          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4608       }
   4609 
   4610       sm      = get_secmap_for_reading_low(a);
   4611       sm_off  = SM_OFF(a);
   4612       vabits8 = sm->vabits8[sm_off];
   4613       // Handle common case quickly: a is suitably aligned, is mapped, and is
   4614       // addressible.
   4615       // Convert V bits from compact memory form to expanded register form
   4616       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
   4617       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
   4618       else {
   4619          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   4620          // the two sub-bytes.
   4621          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
   4622          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
   4623          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
   4624          else {
   4625             /* Slow case: the two bytes are not all-defined or all-undefined. */
   4626             PROF_EVENT(242, "mc_LOADV16-slow2");
   4627             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4628          }
   4629       }
   4630    }
   4631 #endif
   4632 }
   4633 
   4634 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
   4635 {
   4636    return mc_LOADV16(a, True);
   4637 }
   4638 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
   4639 {
   4640    return mc_LOADV16(a, False);
   4641 }
   4642 
   4643 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
   4644 static INLINE
   4645 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
   4646 {
   4647    UInt shift;
   4648    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
   4649    shift = (a & 2) << 1;               // shift by 0 or 4
   4650    vabits8 >>= shift;                  // shift the four bits to the bottom
   4651     // check 2 x vabits2 != VA_BITS2_NOACCESS
   4652    return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
   4653       &&  ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
   4654 }
   4655 
   4656 static INLINE
   4657 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
   4658 {
   4659    PROF_EVENT(250, "mc_STOREV16");
   4660 
   4661 #ifndef PERF_FAST_STOREV
   4662    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4663 #else
   4664    {
   4665       UWord   sm_off, vabits8;
   4666       SecMap* sm;
   4667 
   4668       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   4669          PROF_EVENT(251, "mc_STOREV16-slow1");
   4670          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4671          return;
   4672       }
   4673 
   4674       sm      = get_secmap_for_reading_low(a);
   4675       sm_off  = SM_OFF(a);
   4676       vabits8 = sm->vabits8[sm_off];
   4677 
   4678       // To understand the below cleverness, see the extensive comments
   4679       // in MC_(helperc_STOREV8).
   4680       if (LIKELY(V_BITS16_DEFINED == vbits16)) {
   4681          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   4682             return;
   4683          }
   4684          if (!is_distinguished_sm(sm)
   4685              && accessible_vabits4_in_vabits8(a, vabits8)) {
   4686             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
   4687                                          &(sm->vabits8[sm_off]) );
   4688             return;
   4689          }
   4690          PROF_EVENT(232, "mc_STOREV16-slow2");
   4691          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4692       }
   4693       if (V_BITS16_UNDEFINED == vbits16) {
   4694          if (vabits8 == VA_BITS8_UNDEFINED) {
   4695             return;
   4696          }
   4697          if (!is_distinguished_sm(sm)
   4698              && accessible_vabits4_in_vabits8(a, vabits8)) {
   4699             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
   4700                                          &(sm->vabits8[sm_off]) );
   4701             return;
   4702          }
   4703          PROF_EVENT(233, "mc_STOREV16-slow3");
   4704          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4705          return;
   4706       }
   4707 
   4708       PROF_EVENT(234, "mc_STOREV16-slow4");
   4709       mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4710    }
   4711 #endif
   4712 }
   4713 
   4714 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
   4715 {
   4716    mc_STOREV16(a, vbits16, True);
   4717 }
   4718 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
   4719 {
   4720    mc_STOREV16(a, vbits16, False);
   4721 }
   4722 
   4723 
   4724 /* ------------------------ Size = 1 ------------------------ */
   4725 /* Note: endianness is irrelevant for size == 1 */
   4726 
   4727 VG_REGPARM(1)
   4728 UWord MC_(helperc_LOADV8) ( Addr a )
   4729 {
   4730    PROF_EVENT(260, "mc_LOADV8");
   4731 
   4732 #ifndef PERF_FAST_LOADV
   4733    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4734 #else
   4735    {
   4736       UWord   sm_off, vabits8;
   4737       SecMap* sm;
   4738 
   4739       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   4740          PROF_EVENT(261, "mc_LOADV8-slow1");
   4741          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4742       }
   4743 
   4744       sm      = get_secmap_for_reading_low(a);
   4745       sm_off  = SM_OFF(a);
   4746       vabits8 = sm->vabits8[sm_off];
   4747       // Convert V bits from compact memory form to expanded register form
   4748       // Handle common case quickly: a is mapped, and the entire
   4749       // word32 it lives in is addressible.
   4750       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
   4751       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
   4752       else {
   4753          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   4754          // the single byte.
   4755          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
   4756          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
   4757          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
   4758          else {
   4759             /* Slow case: the byte is not all-defined or all-undefined. */
   4760             PROF_EVENT(262, "mc_LOADV8-slow2");
   4761             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4762          }
   4763       }
   4764    }
   4765 #endif
   4766 }
   4767 
   4768 
   4769 VG_REGPARM(2)
   4770 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
   4771 {
   4772    PROF_EVENT(270, "mc_STOREV8");
   4773 
   4774 #ifndef PERF_FAST_STOREV
   4775    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4776 #else
   4777    {
   4778       UWord   sm_off, vabits8;
   4779       SecMap* sm;
   4780 
   4781       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   4782          PROF_EVENT(271, "mc_STOREV8-slow1");
   4783          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4784          return;
   4785       }
   4786 
   4787       sm      = get_secmap_for_reading_low(a);
   4788       sm_off  = SM_OFF(a);
   4789       vabits8 = sm->vabits8[sm_off];
   4790 
   4791       // Clevernesses to speed up storing V bits.
   4792       // The 64/32/16 bit cases also have similar clevernesses, but it
   4793       // works a little differently to the code below.
   4794       //
   4795       // Cleverness 1:  sometimes we don't have to write the shadow memory at
   4796       // all, if we can tell that what we want to write is the same as what is
   4797       // already there. These cases are marked below as "defined on defined" and
   4798       // "undefined on undefined".
   4799       //
   4800       // Cleverness 2:
   4801       // We also avoid to call mc_STOREVn_slow if the V bits can directly
   4802       // be written in the secondary map. V bits can be directly written
   4803       // if 4 conditions are respected:
   4804       //   * The address for which V bits are written is naturally aligned
   4805       //        on 1 byte  for STOREV8 (this is always true)
   4806       //        on 2 bytes for STOREV16
   4807       //        on 4 bytes for STOREV32
   4808       //        on 8 bytes for STOREV64.
   4809       //   * V bits being written are either fully defined or fully undefined.
   4810       //     (for partially defined V bits, V bits cannot be directly written,
   4811       //      as the secondary vbits table must be maintained).
   4812       //   * the secmap is not distinguished (distinguished maps cannot be
   4813       //     modified).
   4814       //   * the memory corresponding to the V bits being written is
   4815       //     accessible (if one or more bytes are not accessible,
   4816       //     we must call mc_STOREVn_slow in order to report accessibility
   4817       //     errors).
   4818       //     Note that for STOREV32 and STOREV64, it is too expensive
   4819       //     to verify the accessibility of each byte for the benefit it
   4820       //     brings. Instead, a quicker check is done by comparing to
   4821       //     VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
   4822       //     but misses some opportunity of direct modifications.
   4823       //     Checking each byte accessibility was measured for
   4824       //     STOREV32+perf tests and was slowing down all perf tests.
   4825       // The cases corresponding to cleverness 2 are marked below as
   4826       // "direct mod".
   4827       if (LIKELY(V_BITS8_DEFINED == vbits8)) {
   4828          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   4829             return; // defined on defined
   4830          }
   4831          if (!is_distinguished_sm(sm)
   4832              && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
   4833             // direct mod
   4834             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
   4835                                          &(sm->vabits8[sm_off]) );
   4836             return;
   4837          }
   4838          PROF_EVENT(232, "mc_STOREV8-slow2");
   4839          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4840          return;
   4841       }
   4842       if (V_BITS8_UNDEFINED == vbits8) {
   4843          if (vabits8 == VA_BITS8_UNDEFINED) {
   4844             return; // undefined on undefined
   4845          }
   4846          if (!is_distinguished_sm(sm)
   4847              && (VA_BITS2_NOACCESS
   4848                  != extract_vabits2_from_vabits8(a, vabits8))) {
   4849             // direct mod
   4850             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
   4851                                          &(sm->vabits8[sm_off]) );
   4852             return;
   4853          }
   4854          PROF_EVENT(233, "mc_STOREV8-slow3");
   4855          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4856          return;
   4857       }
   4858 
   4859       // Partially defined word
   4860       PROF_EVENT(234, "mc_STOREV8-slow4");
   4861       mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4862    }
   4863 #endif
   4864 }
   4865 
   4866 
   4867 /*------------------------------------------------------------*/
   4868 /*--- Functions called directly from generated code:       ---*/
   4869 /*--- Value-check failure handlers.                        ---*/
   4870 /*------------------------------------------------------------*/
   4871 
   4872 /* Call these ones when an origin is available ... */
   4873 VG_REGPARM(1)
   4874 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
   4875    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
   4876 }
   4877 
   4878 VG_REGPARM(1)
   4879 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
   4880    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
   4881 }
   4882 
   4883 VG_REGPARM(1)
   4884 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
   4885    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
   4886 }
   4887 
   4888 VG_REGPARM(1)
   4889 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
   4890    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
   4891 }
   4892 
   4893 VG_REGPARM(2)
   4894 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
   4895    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
   4896 }
   4897 
   4898 /* ... and these when an origin isn't available. */
   4899 
   4900 VG_REGPARM(0)
   4901 void MC_(helperc_value_check0_fail_no_o) ( void ) {
   4902    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
   4903 }
   4904 
   4905 VG_REGPARM(0)
   4906 void MC_(helperc_value_check1_fail_no_o) ( void ) {
   4907    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
   4908 }
   4909 
   4910 VG_REGPARM(0)
   4911 void MC_(helperc_value_check4_fail_no_o) ( void ) {
   4912    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
   4913 }
   4914 
   4915 VG_REGPARM(0)
   4916 void MC_(helperc_value_check8_fail_no_o) ( void ) {
   4917    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
   4918 }
   4919 
   4920 VG_REGPARM(1)
   4921 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
   4922    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
   4923 }
   4924 
   4925 
   4926 /*------------------------------------------------------------*/
   4927 /*--- Metadata get/set functions, for client requests.     ---*/
   4928 /*------------------------------------------------------------*/
   4929 
   4930 // Nb: this expands the V+A bits out into register-form V bits, even though
   4931 // they're in memory.  This is for backward compatibility, and because it's
   4932 // probably what the user wants.
   4933 
   4934 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
   4935    error [no longer used], 3 == addressing error. */
   4936 /* Nb: We used to issue various definedness/addressability errors from here,
   4937    but we took them out because they ranged from not-very-helpful to
   4938    downright annoying, and they complicated the error data structures. */
   4939 static Int mc_get_or_set_vbits_for_client (
   4940    Addr a,
   4941    Addr vbits,
   4942    SizeT szB,
   4943    Bool setting, /* True <=> set vbits,  False <=> get vbits */
   4944    Bool is_client_request /* True <=> real user request
   4945                              False <=> internal call from gdbserver */
   4946 )
   4947 {
   4948    SizeT i;
   4949    Bool  ok;
   4950    UChar vbits8;
   4951 
   4952    /* Check that arrays are addressible before doing any getting/setting.
   4953       vbits to be checked only for real user request. */
   4954    for (i = 0; i < szB; i++) {
   4955       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
   4956           (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
   4957          return 3;
   4958       }
   4959    }
   4960 
   4961    /* Do the copy */
   4962    if (setting) {
   4963       /* setting */
   4964       for (i = 0; i < szB; i++) {
   4965          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
   4966          tl_assert(ok);
   4967       }
   4968    } else {
   4969       /* getting */
   4970       for (i = 0; i < szB; i++) {
   4971          ok = get_vbits8(a + i, &vbits8);
   4972          tl_assert(ok);
   4973          ((UChar*)vbits)[i] = vbits8;
   4974       }
   4975       if (is_client_request)
   4976         // The bytes in vbits[] have now been set, so mark them as such.
   4977         MC_(make_mem_defined)(vbits, szB);
   4978    }
   4979 
   4980    return 1;
   4981 }
   4982 
   4983 
   4984 /*------------------------------------------------------------*/
   4985 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
   4986 /*------------------------------------------------------------*/
   4987 
   4988 /* For the memory leak detector, say whether an entire 64k chunk of
   4989    address space is possibly in use, or not.  If in doubt return
   4990    True.
   4991 */
   4992 Bool MC_(is_within_valid_secondary) ( Addr a )
   4993 {
   4994    SecMap* sm = maybe_get_secmap_for ( a );
   4995    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
   4996       /* Definitely not in use. */
   4997       return False;
   4998    } else {
   4999       return True;
   5000    }
   5001 }
   5002 
   5003 
   5004 /* For the memory leak detector, say whether or not a given word
   5005    address is to be regarded as valid. */
   5006 Bool MC_(is_valid_aligned_word) ( Addr a )
   5007 {
   5008    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
   5009    tl_assert(VG_IS_WORD_ALIGNED(a));
   5010    if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
   5011       return False;
   5012    if (sizeof(UWord) == 8) {
   5013       if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
   5014          return False;
   5015    }
   5016    if (UNLIKELY(MC_(in_ignored_range)(a)))
   5017       return False;
   5018    else
   5019       return True;
   5020 }
   5021 
   5022 
   5023 /*------------------------------------------------------------*/
   5024 /*--- Initialisation                                       ---*/
   5025 /*------------------------------------------------------------*/
   5026 
   5027 static void init_shadow_memory ( void )
   5028 {
   5029    Int     i;
   5030    SecMap* sm;
   5031 
   5032    tl_assert(V_BIT_UNDEFINED   == 1);
   5033    tl_assert(V_BIT_DEFINED     == 0);
   5034    tl_assert(V_BITS8_UNDEFINED == 0xFF);
   5035    tl_assert(V_BITS8_DEFINED   == 0);
   5036 
   5037    /* Build the 3 distinguished secondaries */
   5038    sm = &sm_distinguished[SM_DIST_NOACCESS];
   5039    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
   5040 
   5041    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   5042    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
   5043 
   5044    sm = &sm_distinguished[SM_DIST_DEFINED];
   5045    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
   5046 
   5047    /* Set up the primary map. */
   5048    /* These entries gradually get overwritten as the used address
   5049       space expands. */
   5050    for (i = 0; i < N_PRIMARY_MAP; i++)
   5051       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
   5052 
   5053    /* Auxiliary primary maps */
   5054    init_auxmap_L1_L2();
   5055 
   5056    /* auxmap_size = auxmap_used = 0;
   5057       no ... these are statically initialised */
   5058 
   5059    /* Secondary V bit table */
   5060    secVBitTable = createSecVBitTable();
   5061 }
   5062 
   5063 
   5064 /*------------------------------------------------------------*/
   5065 /*--- Sanity check machinery (permanently engaged)         ---*/
   5066 /*------------------------------------------------------------*/
   5067 
   5068 static Bool mc_cheap_sanity_check ( void )
   5069 {
   5070    n_sanity_cheap++;
   5071    PROF_EVENT(490, "cheap_sanity_check");
   5072    /* Check for sane operating level */
   5073    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   5074       return False;
   5075    /* nothing else useful we can rapidly check */
   5076    return True;
   5077 }
   5078 
   5079 static Bool mc_expensive_sanity_check ( void )
   5080 {
   5081    Int     i;
   5082    Word    n_secmaps_found;
   5083    SecMap* sm;
   5084    const HChar*  errmsg;
   5085    Bool    bad = False;
   5086 
   5087    if (0) VG_(printf)("expensive sanity check\n");
   5088    if (0) return True;
   5089 
   5090    n_sanity_expensive++;
   5091    PROF_EVENT(491, "expensive_sanity_check");
   5092 
   5093    /* Check for sane operating level */
   5094    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   5095       return False;
   5096 
   5097    /* Check that the 3 distinguished SMs are still as they should be. */
   5098 
   5099    /* Check noaccess DSM. */
   5100    sm = &sm_distinguished[SM_DIST_NOACCESS];
   5101    for (i = 0; i < SM_CHUNKS; i++)
   5102       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
   5103          bad = True;
   5104 
   5105    /* Check undefined DSM. */
   5106    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   5107    for (i = 0; i < SM_CHUNKS; i++)
   5108       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
   5109          bad = True;
   5110 
   5111    /* Check defined DSM. */
   5112    sm = &sm_distinguished[SM_DIST_DEFINED];
   5113    for (i = 0; i < SM_CHUNKS; i++)
   5114       if (sm->vabits8[i] != VA_BITS8_DEFINED)
   5115          bad = True;
   5116 
   5117    if (bad) {
   5118       VG_(printf)("memcheck expensive sanity: "
   5119                   "distinguished_secondaries have changed\n");
   5120       return False;
   5121    }
   5122 
   5123    /* If we're not checking for undefined value errors, the secondary V bit
   5124     * table should be empty. */
   5125    if (MC_(clo_mc_level) == 1) {
   5126       if (0 != VG_(OSetGen_Size)(secVBitTable))
   5127          return False;
   5128    }
   5129 
   5130    /* check the auxiliary maps, very thoroughly */
   5131    n_secmaps_found = 0;
   5132    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
   5133    if (errmsg) {
   5134       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
   5135       return False;
   5136    }
   5137 
   5138    /* n_secmaps_found is now the number referred to by the auxiliary
   5139       primary map.  Now add on the ones referred to by the main
   5140       primary map. */
   5141    for (i = 0; i < N_PRIMARY_MAP; i++) {
   5142       if (primary_map[i] == NULL) {
   5143          bad = True;
   5144       } else {
   5145          if (!is_distinguished_sm(primary_map[i]))
   5146             n_secmaps_found++;
   5147       }
   5148    }
   5149 
   5150    /* check that the number of secmaps issued matches the number that
   5151       are reachable (iow, no secmap leaks) */
   5152    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
   5153       bad = True;
   5154 
   5155    if (bad) {
   5156       VG_(printf)("memcheck expensive sanity: "
   5157                   "apparent secmap leakage\n");
   5158       return False;
   5159    }
   5160 
   5161    if (bad) {
   5162       VG_(printf)("memcheck expensive sanity: "
   5163                   "auxmap covers wrong address space\n");
   5164       return False;
   5165    }
   5166 
   5167    /* there is only one pointer to each secmap (expensive) */
   5168 
   5169    return True;
   5170 }
   5171 
   5172 /*------------------------------------------------------------*/
   5173 /*--- Command line args                                    ---*/
   5174 /*------------------------------------------------------------*/
   5175 
   5176 
   5177 Bool          MC_(clo_partial_loads_ok)       = False;
   5178 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
   5179 Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
   5180 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
   5181 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
   5182 UInt          MC_(clo_show_leak_kinds)        = R2S(Possible) | R2S(Unreached);
   5183 UInt          MC_(clo_error_for_leak_kinds)   = R2S(Possible) | R2S(Unreached);
   5184 UInt          MC_(clo_leak_check_heuristics)  = 0;
   5185 Bool          MC_(clo_workaround_gcc296_bugs) = False;
   5186 Int           MC_(clo_malloc_fill)            = -1;
   5187 Int           MC_(clo_free_fill)              = -1;
   5188 KeepStacktraces MC_(clo_keep_stacktraces)     = KS_alloc_then_free;
   5189 Int           MC_(clo_mc_level)               = 2;
   5190 
   5191 static Bool MC_(parse_leak_heuristics) ( const HChar *str0, UInt *lhs )
   5192 {
   5193    return  VG_(parse_enum_set) ("-,stdstring,newarray,multipleinheritance",
   5194                                 str0, lhs);
   5195 }
   5196 
   5197 
   5198 static Bool mc_process_cmd_line_options(const HChar* arg)
   5199 {
   5200    const HChar* tmp_str;
   5201    Int   tmp_show;
   5202 
   5203    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   5204 
   5205    /* Set MC_(clo_mc_level):
   5206          1 = A bit tracking only
   5207          2 = A and V bit tracking, but no V bit origins
   5208          3 = A and V bit tracking, and V bit origins
   5209 
   5210       Do this by inspecting --undef-value-errors= and
   5211       --track-origins=.  Reject the case --undef-value-errors=no
   5212       --track-origins=yes as meaningless.
   5213    */
   5214    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
   5215       if (MC_(clo_mc_level) == 3) {
   5216          goto bad_level;
   5217       } else {
   5218          MC_(clo_mc_level) = 1;
   5219          return True;
   5220       }
   5221    }
   5222    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
   5223       if (MC_(clo_mc_level) == 1)
   5224          MC_(clo_mc_level) = 2;
   5225       return True;
   5226    }
   5227    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
   5228       if (MC_(clo_mc_level) == 3)
   5229          MC_(clo_mc_level) = 2;
   5230       return True;
   5231    }
   5232    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
   5233       if (MC_(clo_mc_level) == 1) {
   5234          goto bad_level;
   5235       } else {
   5236          MC_(clo_mc_level) = 3;
   5237          return True;
   5238       }
   5239    }
   5240 
   5241         if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
   5242    else if VG_STR_CLO(arg, "--errors-for-leak-kinds" , tmp_str) {
   5243       if (!MC_(parse_leak_kinds)(tmp_str, &MC_(clo_error_for_leak_kinds)))
   5244          return False;
   5245    }
   5246    else if VG_STR_CLO(arg, "--show-leak-kinds", tmp_str) {
   5247       if (!MC_(parse_leak_kinds)(tmp_str, &MC_(clo_show_leak_kinds)))
   5248          return False;
   5249    }
   5250    else if VG_STR_CLO(arg, "--leak-check-heuristics", tmp_str) {
   5251       if (!MC_(parse_leak_heuristics)(tmp_str, &MC_(clo_leak_check_heuristics)))
   5252          return False;
   5253    }
   5254    else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
   5255       if (tmp_show) {
   5256          MC_(clo_show_leak_kinds) = RallS;
   5257       } else {
   5258          MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
   5259       }
   5260    }
   5261    else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
   5262       if (tmp_show) {
   5263          MC_(clo_show_leak_kinds) |= R2S(Possible);
   5264       } else {
   5265          MC_(clo_show_leak_kinds) &= ~R2S(Possible);
   5266       }
   5267    }
   5268    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
   5269                                             MC_(clo_workaround_gcc296_bugs)) {}
   5270 
   5271    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
   5272                                                0, 10*1000*1000*1000LL) {}
   5273 
   5274    else if VG_BINT_CLO(arg, "--freelist-big-blocks",
   5275                        MC_(clo_freelist_big_blocks),
   5276                        0, 10*1000*1000*1000LL) {}
   5277 
   5278    else if VG_XACT_CLO(arg, "--leak-check=no",
   5279                             MC_(clo_leak_check), LC_Off) {}
   5280    else if VG_XACT_CLO(arg, "--leak-check=summary",
   5281                             MC_(clo_leak_check), LC_Summary) {}
   5282    else if VG_XACT_CLO(arg, "--leak-check=yes",
   5283                             MC_(clo_leak_check), LC_Full) {}
   5284    else if VG_XACT_CLO(arg, "--leak-check=full",
   5285                             MC_(clo_leak_check), LC_Full) {}
   5286 
   5287    else if VG_XACT_CLO(arg, "--leak-resolution=low",
   5288                             MC_(clo_leak_resolution), Vg_LowRes) {}
   5289    else if VG_XACT_CLO(arg, "--leak-resolution=med",
   5290                             MC_(clo_leak_resolution), Vg_MedRes) {}
   5291    else if VG_XACT_CLO(arg, "--leak-resolution=high",
   5292                             MC_(clo_leak_resolution), Vg_HighRes) {}
   5293 
   5294    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
   5295       Bool ok = parse_ignore_ranges(tmp_str);
   5296       if (!ok) {
   5297          VG_(message)(Vg_DebugMsg,
   5298             "ERROR: --ignore-ranges: "
   5299             "invalid syntax, or end <= start in range\n");
   5300          return False;
   5301       }
   5302       if (gIgnoredAddressRanges) {
   5303          Word i;
   5304          for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
   5305             UWord val     = IAR_INVALID;
   5306             UWord key_min = ~(UWord)0;
   5307             UWord key_max = (UWord)0;
   5308             VG_(indexRangeMap)( &key_min, &key_max, &val,
   5309                                 gIgnoredAddressRanges, i );
   5310             tl_assert(key_min <= key_max);
   5311             UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
   5312             if (key_max - key_min > limit) {
   5313                VG_(message)(Vg_DebugMsg,
   5314                   "ERROR: --ignore-ranges: suspiciously large range:\n");
   5315                VG_(message)(Vg_DebugMsg,
   5316                    "       0x%lx-0x%lx (size %ld)\n", key_min, key_max,
   5317                    key_max - key_min + 1);
   5318                return False;
   5319             }
   5320          }
   5321       }
   5322    }
   5323 
   5324    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
   5325    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
   5326 
   5327    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
   5328                        MC_(clo_keep_stacktraces), KS_alloc) {}
   5329    else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
   5330                        MC_(clo_keep_stacktraces), KS_free) {}
   5331    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
   5332                        MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
   5333    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
   5334                        MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
   5335    else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
   5336                        MC_(clo_keep_stacktraces), KS_none) {}
   5337 
   5338    else
   5339       return VG_(replacement_malloc_process_cmd_line_option)(arg);
   5340 
   5341    return True;
   5342 
   5343 
   5344   bad_level:
   5345    VG_(fmsg_bad_option)(arg,
   5346       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
   5347 }
   5348 
   5349 static void mc_print_usage(void)
   5350 {
   5351    VG_(printf)(
   5352 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
   5353 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
   5354 "    --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
   5355 "                                            [definite,possible]\n"
   5356 "    --errors-for-leak-kinds=kind1,kind2,..  which leak kinds are errors?\n"
   5357 "                                            [definite,possible]\n"
   5358 "        where kind is one of definite indirect possible reachable all none\n"
   5359 "    --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
   5360 "        improving leak search false positive [none]\n"
   5361 "        where heur is one of stdstring newarray multipleinheritance all none\n"
   5362 "    --show-reachable=yes             same as --show-leak-kinds=all\n"
   5363 "    --show-reachable=no --show-possibly-lost=yes\n"
   5364 "                                     same as --show-leak-kinds=definite,possible\n"
   5365 "    --show-reachable=no --show-possibly-lost=no\n"
   5366 "                                     same as --show-leak-kinds=definite\n"
   5367 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
   5368 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
   5369 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [no]\n"
   5370 "    --freelist-vol=<number>          volume of freed blocks queue     [20000000]\n"
   5371 "    --freelist-big-blocks=<number>   releases first blocks with size>= [1000000]\n"
   5372 "    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
   5373 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
   5374 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
   5375 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
   5376 "    --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
   5377 "        stack trace(s) to keep for malloc'd/free'd areas       [alloc-then-free]\n"
   5378    );
   5379 }
   5380 
   5381 static void mc_print_debug_usage(void)
   5382 {
   5383    VG_(printf)(
   5384 "    (none)\n"
   5385    );
   5386 }
   5387 
   5388 
   5389 /*------------------------------------------------------------*/
   5390 /*--- Client blocks                                        ---*/
   5391 /*------------------------------------------------------------*/
   5392 
   5393 /* Client block management:
   5394 
   5395    This is managed as an expanding array of client block descriptors.
   5396    Indices of live descriptors are issued to the client, so it can ask
   5397    to free them later.  Therefore we cannot slide live entries down
   5398    over dead ones.  Instead we must use free/inuse flags and scan for
   5399    an empty slot at allocation time.  This in turn means allocation is
   5400    relatively expensive, so we hope this does not happen too often.
   5401 
   5402    An unused block has start == size == 0
   5403 */
   5404 
   5405 /* type CGenBlock is defined in mc_include.h */
   5406 
   5407 /* This subsystem is self-initialising. */
   5408 static UWord      cgb_size = 0;
   5409 static UWord      cgb_used = 0;
   5410 static CGenBlock* cgbs     = NULL;
   5411 
   5412 /* Stats for this subsystem. */
   5413 static ULong cgb_used_MAX = 0;   /* Max in use. */
   5414 static ULong cgb_allocs   = 0;   /* Number of allocs. */
   5415 static ULong cgb_discards = 0;   /* Number of discards. */
   5416 static ULong cgb_search   = 0;   /* Number of searches. */
   5417 
   5418 
   5419 /* Get access to the client block array. */
   5420 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
   5421                                  /*OUT*/UWord* nBlocks )
   5422 {
   5423    *blocks  = cgbs;
   5424    *nBlocks = cgb_used;
   5425 }
   5426 
   5427 
   5428 static
   5429 Int alloc_client_block ( void )
   5430 {
   5431    UWord      i, sz_new;
   5432    CGenBlock* cgbs_new;
   5433 
   5434    cgb_allocs++;
   5435 
   5436    for (i = 0; i < cgb_used; i++) {
   5437       cgb_search++;
   5438       if (cgbs[i].start == 0 && cgbs[i].size == 0)
   5439          return i;
   5440    }
   5441 
   5442    /* Not found.  Try to allocate one at the end. */
   5443    if (cgb_used < cgb_size) {
   5444       cgb_used++;
   5445       return cgb_used-1;
   5446    }
   5447 
   5448    /* Ok, we have to allocate a new one. */
   5449    tl_assert(cgb_used == cgb_size);
   5450    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
   5451 
   5452    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
   5453    for (i = 0; i < cgb_used; i++)
   5454       cgbs_new[i] = cgbs[i];
   5455 
   5456    if (cgbs != NULL)
   5457       VG_(free)( cgbs );
   5458    cgbs = cgbs_new;
   5459 
   5460    cgb_size = sz_new;
   5461    cgb_used++;
   5462    if (cgb_used > cgb_used_MAX)
   5463       cgb_used_MAX = cgb_used;
   5464    return cgb_used-1;
   5465 }
   5466 
   5467 
   5468 static void show_client_block_stats ( void )
   5469 {
   5470    VG_(message)(Vg_DebugMsg,
   5471       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
   5472       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
   5473    );
   5474 }
   5475 static void print_monitor_help ( void )
   5476 {
   5477    VG_(gdb_printf)
   5478       (
   5479 "\n"
   5480 "memcheck monitor commands:\n"
   5481 "  get_vbits <addr> [<len>]\n"
   5482 "        returns validity bits for <len> (or 1) bytes at <addr>\n"
   5483 "            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
   5484 "        Example: get_vbits 0x8049c78 10\n"
   5485 "  make_memory [noaccess|undefined\n"
   5486 "                     |defined|Definedifaddressable] <addr> [<len>]\n"
   5487 "        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
   5488 "  check_memory [addressable|defined] <addr> [<len>]\n"
   5489 "        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
   5490 "            and outputs a description of <addr>\n"
   5491 "  leak_check [full*|summary]\n"
   5492 "                [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
   5493 "                [heuristics heur1,heur2,...]\n"
   5494 "                [increased*|changed|any]\n"
   5495 "                [unlimited*|limited <max_loss_records_output>]\n"
   5496 "            * = defaults\n"
   5497 "       where kind is one of definite indirect possible reachable all none\n"
   5498 "       where heur is one of stdstring newarray multipleinheritance all none*\n"
   5499 "        Examples: leak_check\n"
   5500 "                  leak_check summary any\n"
   5501 "                  leak_check full kinds indirect,possible\n"
   5502 "                  leak_check full reachable any limited 100\n"
   5503 "  block_list <loss_record_nr>\n"
   5504 "        after a leak search, shows the list of blocks of <loss_record_nr>\n"
   5505 "  who_points_at <addr> [<len>]\n"
   5506 "        shows places pointing inside <len> (default 1) bytes at <addr>\n"
   5507 "        (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
   5508 "         with len > 1, will also show \"interior pointers\")\n"
   5509 "\n");
   5510 }
   5511 
   5512 /* return True if request recognised, False otherwise */
   5513 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
   5514 {
   5515    HChar* wcmd;
   5516    HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */
   5517    HChar *ssaveptr;
   5518 
   5519    VG_(strcpy) (s, req);
   5520 
   5521    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
   5522    /* NB: if possible, avoid introducing a new command below which
   5523       starts with the same first letter(s) as an already existing
   5524       command. This ensures a shorter abbreviation for the user. */
   5525    switch (VG_(keyword_id)
   5526            ("help get_vbits leak_check make_memory check_memory "
   5527             "block_list who_points_at",
   5528             wcmd, kwd_report_duplicated_matches)) {
   5529    case -2: /* multiple matches */
   5530       return True;
   5531    case -1: /* not found */
   5532       return False;
   5533    case  0: /* help */
   5534       print_monitor_help();
   5535       return True;
   5536    case  1: { /* get_vbits */
   5537       Addr address;
   5538       SizeT szB = 1;
   5539       if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
   5540          UChar vbits;
   5541          Int i;
   5542          Int unaddressable = 0;
   5543          for (i = 0; i < szB; i++) {
   5544             Int res = mc_get_or_set_vbits_for_client
   5545                (address+i, (Addr) &vbits, 1,
   5546                 False, /* get them */
   5547                 False  /* is client request */ );
   5548             /* we are before the first character on next line, print a \n. */
   5549             if ((i % 32) == 0 && i != 0)
   5550                VG_(printf) ("\n");
   5551             /* we are before the next block of 4 starts, print a space. */
   5552             else if ((i % 4) == 0 && i != 0)
   5553                VG_(printf) (" ");
   5554             if (res == 1) {
   5555                VG_(printf) ("%02x", vbits);
   5556             } else {
   5557                tl_assert(3 == res);
   5558                unaddressable++;
   5559                VG_(printf) ("__");
   5560             }
   5561          }
   5562          VG_(printf) ("\n");
   5563          if (unaddressable) {
   5564             VG_(printf)
   5565                ("Address %p len %ld has %d bytes unaddressable\n",
   5566                 (void *)address, szB, unaddressable);
   5567          }
   5568       }
   5569       return True;
   5570    }
   5571    case  2: { /* leak_check */
   5572       Int err = 0;
   5573       LeakCheckParams lcp;
   5574       HChar* kw;
   5575 
   5576       lcp.mode               = LC_Full;
   5577       lcp.show_leak_kinds    = R2S(Possible) | R2S(Unreached);
   5578       lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
   5579       lcp.heuristics         = 0;
   5580       lcp.deltamode          = LCD_Increased;
   5581       lcp.max_loss_records_output = 999999999;
   5582       lcp.requested_by_monitor_command = True;
   5583 
   5584       for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5585            kw != NULL;
   5586            kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
   5587          switch (VG_(keyword_id)
   5588                  ("full summary "
   5589                   "kinds reachable possibleleak definiteleak "
   5590                   "heuristics "
   5591                   "increased changed any "
   5592                   "unlimited limited ",
   5593                   kw, kwd_report_all)) {
   5594          case -2: err++; break;
   5595          case -1: err++; break;
   5596          case  0: /* full */
   5597             lcp.mode = LC_Full; break;
   5598          case  1: /* summary */
   5599             lcp.mode = LC_Summary; break;
   5600          case  2: { /* kinds */
   5601             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5602             if (wcmd == NULL || !MC_(parse_leak_kinds)(wcmd,
   5603                                                        &lcp.show_leak_kinds)) {
   5604                VG_(gdb_printf) ("missing or malformed leak kinds set\n");
   5605                err++;
   5606             }
   5607             break;
   5608          }
   5609          case  3: /* reachable */
   5610             lcp.show_leak_kinds = RallS;
   5611             break;
   5612          case  4: /* possibleleak */
   5613             lcp.show_leak_kinds
   5614                = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
   5615             break;
   5616          case  5: /* definiteleak */
   5617             lcp.show_leak_kinds = R2S(Unreached);
   5618             break;
   5619          case  6: { /* heuristics */
   5620             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5621             if (wcmd == NULL || !MC_(parse_leak_heuristics)(wcmd,
   5622                                                             &lcp.heuristics)) {
   5623                VG_(gdb_printf) ("missing or malformed heuristics set\n");
   5624                err++;
   5625             }
   5626             break;
   5627          }
   5628          case  7: /* increased */
   5629             lcp.deltamode = LCD_Increased; break;
   5630          case  8: /* changed */
   5631             lcp.deltamode = LCD_Changed; break;
   5632          case  9: /* any */
   5633             lcp.deltamode = LCD_Any; break;
   5634          case 10: /* unlimited */
   5635             lcp.max_loss_records_output = 999999999; break;
   5636          case 11: { /* limited */
   5637             Int int_value;
   5638             const HChar* endptr;
   5639 
   5640             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5641             if (wcmd == NULL) {
   5642                int_value = 0;
   5643                endptr = "empty"; /* to report an error below */
   5644             } else {
   5645                HChar *the_end;
   5646                int_value = VG_(strtoll10) (wcmd, &the_end);
   5647                endptr = the_end;
   5648             }
   5649             if (*endptr != '\0')
   5650                VG_(gdb_printf) ("missing or malformed integer value\n");
   5651             else if (int_value > 0)
   5652                lcp.max_loss_records_output = (UInt) int_value;
   5653             else
   5654                VG_(gdb_printf) ("max_loss_records_output must be >= 1, got %d\n",
   5655                                 int_value);
   5656             break;
   5657          }
   5658          default:
   5659             tl_assert (0);
   5660          }
   5661       }
   5662       if (!err)
   5663          MC_(detect_memory_leaks)(tid, &lcp);
   5664       return True;
   5665    }
   5666 
   5667    case  3: { /* make_memory */
   5668       Addr address;
   5669       SizeT szB = 1;
   5670       Int kwdid = VG_(keyword_id)
   5671          ("noaccess undefined defined Definedifaddressable",
   5672           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
   5673       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
   5674          return True;
   5675       switch (kwdid) {
   5676       case -2: break;
   5677       case -1: break;
   5678       case  0: MC_(make_mem_noaccess) (address, szB); break;
   5679       case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
   5680                                                     MC_OKIND_USER ); break;
   5681       case  2: MC_(make_mem_defined) ( address, szB ); break;
   5682       case  3: make_mem_defined_if_addressable ( address, szB ); break;;
   5683       default: tl_assert(0);
   5684       }
   5685       return True;
   5686    }
   5687 
   5688    case  4: { /* check_memory */
   5689       Addr address;
   5690       SizeT szB = 1;
   5691       Addr bad_addr;
   5692       UInt okind;
   5693       const HChar* src;
   5694       UInt otag;
   5695       UInt ecu;
   5696       ExeContext* origin_ec;
   5697       MC_ReadResult res;
   5698 
   5699       Int kwdid = VG_(keyword_id)
   5700          ("addressable defined",
   5701           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
   5702       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
   5703          return True;
   5704       switch (kwdid) {
   5705       case -2: break;
   5706       case -1: break;
   5707       case  0: /* addressable */
   5708          if (is_mem_addressable ( address, szB, &bad_addr ))
   5709             VG_(printf) ("Address %p len %ld addressable\n",
   5710                              (void *)address, szB);
   5711          else
   5712             VG_(printf)
   5713                ("Address %p len %ld not addressable:\nbad address %p\n",
   5714                 (void *)address, szB, (void *) bad_addr);
   5715          MC_(pp_describe_addr) (address);
   5716          break;
   5717       case  1: /* defined */
   5718          res = is_mem_defined ( address, szB, &bad_addr, &otag );
   5719          if (MC_AddrErr == res)
   5720             VG_(printf)
   5721                ("Address %p len %ld not addressable:\nbad address %p\n",
   5722                 (void *)address, szB, (void *) bad_addr);
   5723          else if (MC_ValueErr == res) {
   5724             okind = otag & 3;
   5725             switch (okind) {
   5726             case MC_OKIND_STACK:
   5727                src = " was created by a stack allocation"; break;
   5728             case MC_OKIND_HEAP:
   5729                src = " was created by a heap allocation"; break;
   5730             case MC_OKIND_USER:
   5731                src = " was created by a client request"; break;
   5732             case MC_OKIND_UNKNOWN:
   5733                src = ""; break;
   5734             default: tl_assert(0);
   5735             }
   5736             VG_(printf)
   5737                ("Address %p len %ld not defined:\n"
   5738                 "Uninitialised value at %p%s\n",
   5739                 (void *)address, szB, (void *) bad_addr, src);
   5740             ecu = otag & ~3;
   5741             if (VG_(is_plausible_ECU)(ecu)) {
   5742                origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
   5743                VG_(pp_ExeContext)( origin_ec );
   5744             }
   5745          }
   5746          else
   5747             VG_(printf) ("Address %p len %ld defined\n",
   5748                          (void *)address, szB);
   5749          MC_(pp_describe_addr) (address);
   5750          break;
   5751       default: tl_assert(0);
   5752       }
   5753       return True;
   5754    }
   5755 
   5756    case  5: { /* block_list */
   5757       HChar* wl;
   5758       HChar *endptr;
   5759       UInt lr_nr = 0;
   5760       wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5761       if (wl != NULL)
   5762          lr_nr = VG_(strtoull10) (wl, &endptr);
   5763       if (wl == NULL || *endptr != '\0') {
   5764          VG_(gdb_printf) ("malformed or missing integer\n");
   5765       } else {
   5766          // lr_nr-1 as what is shown to the user is 1 more than the index in lr_array.
   5767          if (lr_nr == 0 || ! MC_(print_block_list) (lr_nr-1))
   5768             VG_(gdb_printf) ("invalid loss record nr\n");
   5769       }
   5770       return True;
   5771    }
   5772 
   5773    case  6: { /* who_points_at */
   5774       Addr address;
   5775       SizeT szB = 1;
   5776 
   5777       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
   5778          return True;
   5779       if (address == (Addr) 0) {
   5780          VG_(gdb_printf) ("Cannot search who points at 0x0\n");
   5781          return True;
   5782       }
   5783       MC_(who_points_at) (address, szB);
   5784       return True;
   5785    }
   5786 
   5787    default:
   5788       tl_assert(0);
   5789       return False;
   5790    }
   5791 }
   5792 
   5793 /*------------------------------------------------------------*/
   5794 /*--- Client requests                                      ---*/
   5795 /*------------------------------------------------------------*/
   5796 
   5797 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
   5798 {
   5799    Int   i;
   5800    Addr  bad_addr;
   5801 
   5802    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
   5803        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
   5804        && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
   5805        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
   5806        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
   5807        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
   5808        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
   5809        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
   5810        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
   5811        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
   5812        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
   5813        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
   5814        && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0]
   5815        && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
   5816        && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
   5817       return False;
   5818 
   5819    switch (arg[0]) {
   5820       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
   5821          Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
   5822          if (!ok)
   5823             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
   5824          *ret = ok ? (UWord)NULL : bad_addr;
   5825          break;
   5826       }
   5827 
   5828       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
   5829          Bool errorV    = False;
   5830          Addr bad_addrV = 0;
   5831          UInt otagV     = 0;
   5832          Bool errorA    = False;
   5833          Addr bad_addrA = 0;
   5834          is_mem_defined_comprehensive(
   5835             arg[1], arg[2],
   5836             &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
   5837          );
   5838          if (errorV) {
   5839             MC_(record_user_error) ( tid, bad_addrV,
   5840                                      /*isAddrErr*/False, otagV );
   5841          }
   5842          if (errorA) {
   5843             MC_(record_user_error) ( tid, bad_addrA,
   5844                                      /*isAddrErr*/True, 0 );
   5845          }
   5846          /* Return the lower of the two erring addresses, if any. */
   5847          *ret = 0;
   5848          if (errorV && !errorA) {
   5849             *ret = bad_addrV;
   5850          }
   5851          if (!errorV && errorA) {
   5852             *ret = bad_addrA;
   5853          }
   5854          if (errorV && errorA) {
   5855             *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
   5856          }
   5857          break;
   5858       }
   5859 
   5860       case VG_USERREQ__DO_LEAK_CHECK: {
   5861          LeakCheckParams lcp;
   5862 
   5863          if (arg[1] == 0)
   5864             lcp.mode = LC_Full;
   5865          else if (arg[1] == 1)
   5866             lcp.mode = LC_Summary;
   5867          else {
   5868             VG_(message)(Vg_UserMsg,
   5869                          "Warning: unknown memcheck leak search mode\n");
   5870             lcp.mode = LC_Full;
   5871          }
   5872 
   5873          lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
   5874          lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
   5875          lcp.heuristics = MC_(clo_leak_check_heuristics);
   5876 
   5877          if (arg[2] == 0)
   5878             lcp.deltamode = LCD_Any;
   5879          else if (arg[2] == 1)
   5880             lcp.deltamode = LCD_Increased;
   5881          else if (arg[2] == 2)
   5882             lcp.deltamode = LCD_Changed;
   5883          else {
   5884             VG_(message)
   5885                (Vg_UserMsg,
   5886                 "Warning: unknown memcheck leak search deltamode\n");
   5887             lcp.deltamode = LCD_Any;
   5888          }
   5889          lcp.max_loss_records_output = 999999999;
   5890          lcp.requested_by_monitor_command = False;
   5891 
   5892          MC_(detect_memory_leaks)(tid, &lcp);
   5893          *ret = 0; /* return value is meaningless */
   5894          break;
   5895       }
   5896 
   5897       case VG_USERREQ__MAKE_MEM_NOACCESS:
   5898          MC_(make_mem_noaccess) ( arg[1], arg[2] );
   5899          *ret = -1;
   5900          break;
   5901 
   5902       case VG_USERREQ__MAKE_MEM_UNDEFINED:
   5903          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
   5904                                               MC_OKIND_USER );
   5905          *ret = -1;
   5906          break;
   5907 
   5908       case VG_USERREQ__MAKE_MEM_DEFINED:
   5909          MC_(make_mem_defined) ( arg[1], arg[2] );
   5910          *ret = -1;
   5911          break;
   5912 
   5913       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
   5914          make_mem_defined_if_addressable ( arg[1], arg[2] );
   5915          *ret = -1;
   5916          break;
   5917 
   5918       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
   5919          if (arg[1] != 0 && arg[2] != 0) {
   5920             i = alloc_client_block();
   5921             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
   5922             cgbs[i].start = arg[1];
   5923             cgbs[i].size  = arg[2];
   5924             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
   5925             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
   5926             *ret = i;
   5927          } else
   5928             *ret = -1;
   5929          break;
   5930 
   5931       case VG_USERREQ__DISCARD: /* discard */
   5932          if (cgbs == NULL
   5933              || arg[2] >= cgb_used ||
   5934              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
   5935             *ret = 1;
   5936          } else {
   5937             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
   5938             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
   5939             VG_(free)(cgbs[arg[2]].desc);
   5940             cgb_discards++;
   5941             *ret = 0;
   5942          }
   5943          break;
   5944 
   5945       case VG_USERREQ__GET_VBITS:
   5946          *ret = mc_get_or_set_vbits_for_client
   5947                    ( arg[1], arg[2], arg[3],
   5948                      False /* get them */,
   5949                      True /* is client request */ );
   5950          break;
   5951 
   5952       case VG_USERREQ__SET_VBITS:
   5953          *ret = mc_get_or_set_vbits_for_client
   5954                    ( arg[1], arg[2], arg[3],
   5955                      True /* set them */,
   5956                      True /* is client request */ );
   5957          break;
   5958 
   5959       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
   5960          UWord** argp = (UWord**)arg;
   5961          // MC_(bytes_leaked) et al were set by the last leak check (or zero
   5962          // if no prior leak checks performed).
   5963          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
   5964          *argp[2] = MC_(bytes_dubious);
   5965          *argp[3] = MC_(bytes_reachable);
   5966          *argp[4] = MC_(bytes_suppressed);
   5967          // there is no argp[5]
   5968          //*argp[5] = MC_(bytes_indirect);
   5969          // XXX need to make *argp[1-4] defined;  currently done in the
   5970          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
   5971          *ret = 0;
   5972          return True;
   5973       }
   5974       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
   5975          UWord** argp = (UWord**)arg;
   5976          // MC_(blocks_leaked) et al were set by the last leak check (or zero
   5977          // if no prior leak checks performed).
   5978          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
   5979          *argp[2] = MC_(blocks_dubious);
   5980          *argp[3] = MC_(blocks_reachable);
   5981          *argp[4] = MC_(blocks_suppressed);
   5982          // there is no argp[5]
   5983          //*argp[5] = MC_(blocks_indirect);
   5984          // XXX need to make *argp[1-4] defined;  currently done in the
   5985          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
   5986          *ret = 0;
   5987          return True;
   5988       }
   5989       case VG_USERREQ__MALLOCLIKE_BLOCK: {
   5990          Addr p         = (Addr)arg[1];
   5991          SizeT sizeB    =       arg[2];
   5992          UInt rzB       =       arg[3];
   5993          Bool is_zeroed = (Bool)arg[4];
   5994 
   5995          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
   5996                           MC_AllocCustom, MC_(malloc_list) );
   5997          if (rzB > 0) {
   5998             MC_(make_mem_noaccess) ( p - rzB, rzB);
   5999             MC_(make_mem_noaccess) ( p + sizeB, rzB);
   6000          }
   6001          return True;
   6002       }
   6003       case VG_USERREQ__RESIZEINPLACE_BLOCK: {
   6004          Addr p         = (Addr)arg[1];
   6005          SizeT oldSizeB =       arg[2];
   6006          SizeT newSizeB =       arg[3];
   6007          UInt rzB       =       arg[4];
   6008 
   6009          MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
   6010          return True;
   6011       }
   6012       case VG_USERREQ__FREELIKE_BLOCK: {
   6013          Addr p         = (Addr)arg[1];
   6014          UInt rzB       =       arg[2];
   6015 
   6016          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
   6017          return True;
   6018       }
   6019 
   6020       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
   6021          HChar* s  = (HChar*)arg[1];
   6022          Addr  dst = (Addr) arg[2];
   6023          Addr  src = (Addr) arg[3];
   6024          SizeT len = (SizeT)arg[4];
   6025          MC_(record_overlap_error)(tid, s, src, dst, len);
   6026          return True;
   6027       }
   6028 
   6029       case VG_USERREQ__CREATE_MEMPOOL: {
   6030          Addr pool      = (Addr)arg[1];
   6031          UInt rzB       =       arg[2];
   6032          Bool is_zeroed = (Bool)arg[3];
   6033 
   6034          MC_(create_mempool) ( pool, rzB, is_zeroed );
   6035          return True;
   6036       }
   6037 
   6038       case VG_USERREQ__DESTROY_MEMPOOL: {
   6039          Addr pool      = (Addr)arg[1];
   6040 
   6041          MC_(destroy_mempool) ( pool );
   6042          return True;
   6043       }
   6044 
   6045       case VG_USERREQ__MEMPOOL_ALLOC: {
   6046          Addr pool      = (Addr)arg[1];
   6047          Addr addr      = (Addr)arg[2];
   6048          UInt size      =       arg[3];
   6049 
   6050          MC_(mempool_alloc) ( tid, pool, addr, size );
   6051          return True;
   6052       }
   6053 
   6054       case VG_USERREQ__MEMPOOL_FREE: {
   6055          Addr pool      = (Addr)arg[1];
   6056          Addr addr      = (Addr)arg[2];
   6057 
   6058          MC_(mempool_free) ( pool, addr );
   6059          return True;
   6060       }
   6061 
   6062       case VG_USERREQ__MEMPOOL_TRIM: {
   6063          Addr pool      = (Addr)arg[1];
   6064          Addr addr      = (Addr)arg[2];
   6065          UInt size      =       arg[3];
   6066 
   6067          MC_(mempool_trim) ( pool, addr, size );
   6068          return True;
   6069       }
   6070 
   6071       case VG_USERREQ__MOVE_MEMPOOL: {
   6072          Addr poolA     = (Addr)arg[1];
   6073          Addr poolB     = (Addr)arg[2];
   6074 
   6075          MC_(move_mempool) ( poolA, poolB );
   6076          return True;
   6077       }
   6078 
   6079       case VG_USERREQ__MEMPOOL_CHANGE: {
   6080          Addr pool      = (Addr)arg[1];
   6081          Addr addrA     = (Addr)arg[2];
   6082          Addr addrB     = (Addr)arg[3];
   6083          UInt size      =       arg[4];
   6084 
   6085          MC_(mempool_change) ( pool, addrA, addrB, size );
   6086          return True;
   6087       }
   6088 
   6089       case VG_USERREQ__MEMPOOL_EXISTS: {
   6090          Addr pool      = (Addr)arg[1];
   6091 
   6092          *ret = (UWord) MC_(mempool_exists) ( pool );
   6093 	 return True;
   6094       }
   6095 
   6096       case VG_USERREQ__GDB_MONITOR_COMMAND: {
   6097          Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
   6098          if (handled)
   6099             *ret = 1;
   6100          else
   6101             *ret = 0;
   6102          return handled;
   6103       }
   6104 
   6105       case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
   6106       case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
   6107          Bool addRange
   6108             = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
   6109          Bool ok
   6110             = modify_ignore_ranges(addRange, arg[1], arg[2]);
   6111          *ret = ok ? 1 : 0;
   6112          return True;
   6113       }
   6114 
   6115       default:
   6116          VG_(message)(
   6117             Vg_UserMsg,
   6118             "Warning: unknown memcheck client request code %llx\n",
   6119             (ULong)arg[0]
   6120          );
   6121          return False;
   6122    }
   6123    return True;
   6124 }
   6125 
   6126 
   6127 /*------------------------------------------------------------*/
   6128 /*--- Crude profiling machinery.                           ---*/
   6129 /*------------------------------------------------------------*/
   6130 
   6131 // We track a number of interesting events (using PROF_EVENT)
   6132 // if MC_PROFILE_MEMORY is defined.
   6133 
   6134 #ifdef MC_PROFILE_MEMORY
   6135 
   6136 UInt   MC_(event_ctr)[N_PROF_EVENTS];
   6137 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
   6138 
   6139 static void init_prof_mem ( void )
   6140 {
   6141    Int i;
   6142    for (i = 0; i < N_PROF_EVENTS; i++) {
   6143       MC_(event_ctr)[i] = 0;
   6144       MC_(event_ctr_name)[i] = NULL;
   6145    }
   6146 }
   6147 
   6148 static void done_prof_mem ( void )
   6149 {
   6150    Int  i;
   6151    Bool spaced = False;
   6152    for (i = 0; i < N_PROF_EVENTS; i++) {
   6153       if (!spaced && (i % 10) == 0) {
   6154          VG_(printf)("\n");
   6155          spaced = True;
   6156       }
   6157       if (MC_(event_ctr)[i] > 0) {
   6158          spaced = False;
   6159          VG_(printf)( "prof mem event %3d: %9d   %s\n",
   6160                       i, MC_(event_ctr)[i],
   6161                       MC_(event_ctr_name)[i]
   6162                          ? MC_(event_ctr_name)[i] : "unnamed");
   6163       }
   6164    }
   6165 }
   6166 
   6167 #else
   6168 
   6169 static void init_prof_mem ( void ) { }
   6170 static void done_prof_mem ( void ) { }
   6171 
   6172 #endif
   6173 
   6174 
   6175 /*------------------------------------------------------------*/
   6176 /*--- Origin tracking stuff                                ---*/
   6177 /*------------------------------------------------------------*/
   6178 
   6179 /*--------------------------------------------*/
   6180 /*--- Origin tracking: load handlers       ---*/
   6181 /*--------------------------------------------*/
   6182 
   6183 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
   6184    return or1 > or2 ? or1 : or2;
   6185 }
   6186 
   6187 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
   6188    OCacheLine* line;
   6189    UChar descr;
   6190    UWord lineoff = oc_line_offset(a);
   6191    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   6192 
   6193    if (OC_ENABLE_ASSERTIONS) {
   6194       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   6195    }
   6196 
   6197    line = find_OCacheLine( a );
   6198 
   6199    descr = line->descr[lineoff];
   6200    if (OC_ENABLE_ASSERTIONS) {
   6201       tl_assert(descr < 0x10);
   6202    }
   6203 
   6204    if (LIKELY(0 == (descr & (1 << byteoff))))  {
   6205       return 0;
   6206    } else {
   6207       return line->w32[lineoff];
   6208    }
   6209 }
   6210 
   6211 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
   6212    OCacheLine* line;
   6213    UChar descr;
   6214    UWord lineoff, byteoff;
   6215 
   6216    if (UNLIKELY(a & 1)) {
   6217       /* Handle misaligned case, slowly. */
   6218       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
   6219       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
   6220       return merge_origins(oLo, oHi);
   6221    }
   6222 
   6223    lineoff = oc_line_offset(a);
   6224    byteoff = a & 3; /* 0 or 2 */
   6225 
   6226    if (OC_ENABLE_ASSERTIONS) {
   6227       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   6228    }
   6229    line = find_OCacheLine( a );
   6230 
   6231    descr = line->descr[lineoff];
   6232    if (OC_ENABLE_ASSERTIONS) {
   6233       tl_assert(descr < 0x10);
   6234    }
   6235 
   6236    if (LIKELY(0 == (descr & (3 << byteoff)))) {
   6237       return 0;
   6238    } else {
   6239       return line->w32[lineoff];
   6240    }
   6241 }
   6242 
   6243 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
   6244    OCacheLine* line;
   6245    UChar descr;
   6246    UWord lineoff;
   6247 
   6248    if (UNLIKELY(a & 3)) {
   6249       /* Handle misaligned case, slowly. */
   6250       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
   6251       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
   6252       return merge_origins(oLo, oHi);
   6253    }
   6254 
   6255    lineoff = oc_line_offset(a);
   6256    if (OC_ENABLE_ASSERTIONS) {
   6257       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   6258    }
   6259 
   6260    line = find_OCacheLine( a );
   6261 
   6262    descr = line->descr[lineoff];
   6263    if (OC_ENABLE_ASSERTIONS) {
   6264       tl_assert(descr < 0x10);
   6265    }
   6266 
   6267    if (LIKELY(0 == descr)) {
   6268       return 0;
   6269    } else {
   6270       return line->w32[lineoff];
   6271    }
   6272 }
   6273 
   6274 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
   6275    OCacheLine* line;
   6276    UChar descrLo, descrHi, descr;
   6277    UWord lineoff;
   6278 
   6279    if (UNLIKELY(a & 7)) {
   6280       /* Handle misaligned case, slowly. */
   6281       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
   6282       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
   6283       return merge_origins(oLo, oHi);
   6284    }
   6285 
   6286    lineoff = oc_line_offset(a);
   6287    if (OC_ENABLE_ASSERTIONS) {
   6288       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   6289    }
   6290 
   6291    line = find_OCacheLine( a );
   6292 
   6293    descrLo = line->descr[lineoff + 0];
   6294    descrHi = line->descr[lineoff + 1];
   6295    descr   = descrLo | descrHi;
   6296    if (OC_ENABLE_ASSERTIONS) {
   6297       tl_assert(descr < 0x10);
   6298    }
   6299 
   6300    if (LIKELY(0 == descr)) {
   6301       return 0; /* both 32-bit chunks are defined */
   6302    } else {
   6303       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
   6304       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
   6305       return merge_origins(oLo, oHi);
   6306    }
   6307 }
   6308 
   6309 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
   6310    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
   6311    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
   6312    UInt oBoth = merge_origins(oLo, oHi);
   6313    return (UWord)oBoth;
   6314 }
   6315 
   6316 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
   6317    UInt oQ0   = (UInt)MC_(helperc_b_load8)( a + 0 );
   6318    UInt oQ1   = (UInt)MC_(helperc_b_load8)( a + 8 );
   6319    UInt oQ2   = (UInt)MC_(helperc_b_load8)( a + 16 );
   6320    UInt oQ3   = (UInt)MC_(helperc_b_load8)( a + 24 );
   6321    UInt oAll  = merge_origins(merge_origins(oQ0, oQ1),
   6322                               merge_origins(oQ2, oQ3));
   6323    return (UWord)oAll;
   6324 }
   6325 
   6326 
   6327 /*--------------------------------------------*/
   6328 /*--- Origin tracking: store handlers      ---*/
   6329 /*--------------------------------------------*/
   6330 
   6331 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
   6332    OCacheLine* line;
   6333    UWord lineoff = oc_line_offset(a);
   6334    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   6335 
   6336    if (OC_ENABLE_ASSERTIONS) {
   6337       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   6338    }
   6339 
   6340    line = find_OCacheLine( a );
   6341 
   6342    if (d32 == 0) {
   6343       line->descr[lineoff] &= ~(1 << byteoff);
   6344    } else {
   6345       line->descr[lineoff] |= (1 << byteoff);
   6346       line->w32[lineoff] = d32;
   6347    }
   6348 }
   6349 
   6350 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
   6351    OCacheLine* line;
   6352    UWord lineoff, byteoff;
   6353 
   6354    if (UNLIKELY(a & 1)) {
   6355       /* Handle misaligned case, slowly. */
   6356       MC_(helperc_b_store1)( a + 0, d32 );
   6357       MC_(helperc_b_store1)( a + 1, d32 );
   6358       return;
   6359    }
   6360 
   6361    lineoff = oc_line_offset(a);
   6362    byteoff = a & 3; /* 0 or 2 */
   6363 
   6364    if (OC_ENABLE_ASSERTIONS) {
   6365       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   6366    }
   6367 
   6368    line = find_OCacheLine( a );
   6369 
   6370    if (d32 == 0) {
   6371       line->descr[lineoff] &= ~(3 << byteoff);
   6372    } else {
   6373       line->descr[lineoff] |= (3 << byteoff);
   6374       line->w32[lineoff] = d32;
   6375    }
   6376 }
   6377 
   6378 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
   6379    OCacheLine* line;
   6380    UWord lineoff;
   6381 
   6382    if (UNLIKELY(a & 3)) {
   6383       /* Handle misaligned case, slowly. */
   6384       MC_(helperc_b_store2)( a + 0, d32 );
   6385       MC_(helperc_b_store2)( a + 2, d32 );
   6386       return;
   6387    }
   6388 
   6389    lineoff = oc_line_offset(a);
   6390    if (OC_ENABLE_ASSERTIONS) {
   6391       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   6392    }
   6393 
   6394    line = find_OCacheLine( a );
   6395 
   6396    if (d32 == 0) {
   6397       line->descr[lineoff] = 0;
   6398    } else {
   6399       line->descr[lineoff] = 0xF;
   6400       line->w32[lineoff] = d32;
   6401    }
   6402 }
   6403 
   6404 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
   6405    OCacheLine* line;
   6406    UWord lineoff;
   6407 
   6408    if (UNLIKELY(a & 7)) {
   6409       /* Handle misaligned case, slowly. */
   6410       MC_(helperc_b_store4)( a + 0, d32 );
   6411       MC_(helperc_b_store4)( a + 4, d32 );
   6412       return;
   6413    }
   6414 
   6415    lineoff = oc_line_offset(a);
   6416    if (OC_ENABLE_ASSERTIONS) {
   6417       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   6418    }
   6419 
   6420    line = find_OCacheLine( a );
   6421 
   6422    if (d32 == 0) {
   6423       line->descr[lineoff + 0] = 0;
   6424       line->descr[lineoff + 1] = 0;
   6425    } else {
   6426       line->descr[lineoff + 0] = 0xF;
   6427       line->descr[lineoff + 1] = 0xF;
   6428       line->w32[lineoff + 0] = d32;
   6429       line->w32[lineoff + 1] = d32;
   6430    }
   6431 }
   6432 
   6433 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
   6434    MC_(helperc_b_store8)( a + 0, d32 );
   6435    MC_(helperc_b_store8)( a + 8, d32 );
   6436 }
   6437 
   6438 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
   6439    MC_(helperc_b_store8)( a +  0, d32 );
   6440    MC_(helperc_b_store8)( a +  8, d32 );
   6441    MC_(helperc_b_store8)( a + 16, d32 );
   6442    MC_(helperc_b_store8)( a + 24, d32 );
   6443 }
   6444 
   6445 
   6446 /*--------------------------------------------*/
   6447 /*--- Origin tracking: sarp handlers       ---*/
   6448 /*--------------------------------------------*/
   6449 
   6450 __attribute__((noinline))
   6451 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
   6452    if ((a & 1) && len >= 1) {
   6453       MC_(helperc_b_store1)( a, otag );
   6454       a++;
   6455       len--;
   6456    }
   6457    if ((a & 2) && len >= 2) {
   6458       MC_(helperc_b_store2)( a, otag );
   6459       a += 2;
   6460       len -= 2;
   6461    }
   6462    if (len >= 4)
   6463       tl_assert(0 == (a & 3));
   6464    while (len >= 4) {
   6465       MC_(helperc_b_store4)( a, otag );
   6466       a += 4;
   6467       len -= 4;
   6468    }
   6469    if (len >= 2) {
   6470       MC_(helperc_b_store2)( a, otag );
   6471       a += 2;
   6472       len -= 2;
   6473    }
   6474    if (len >= 1) {
   6475       MC_(helperc_b_store1)( a, otag );
   6476       //a++;
   6477       len--;
   6478    }
   6479    tl_assert(len == 0);
   6480 }
   6481 
   6482 __attribute__((noinline))
   6483 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
   6484    if ((a & 1) && len >= 1) {
   6485       MC_(helperc_b_store1)( a, 0 );
   6486       a++;
   6487       len--;
   6488    }
   6489    if ((a & 2) && len >= 2) {
   6490       MC_(helperc_b_store2)( a, 0 );
   6491       a += 2;
   6492       len -= 2;
   6493    }
   6494    if (len >= 4)
   6495       tl_assert(0 == (a & 3));
   6496    while (len >= 4) {
   6497       MC_(helperc_b_store4)( a, 0 );
   6498       a += 4;
   6499       len -= 4;
   6500    }
   6501    if (len >= 2) {
   6502       MC_(helperc_b_store2)( a, 0 );
   6503       a += 2;
   6504       len -= 2;
   6505    }
   6506    if (len >= 1) {
   6507       MC_(helperc_b_store1)( a, 0 );
   6508       //a++;
   6509       len--;
   6510    }
   6511    tl_assert(len == 0);
   6512 }
   6513 
   6514 
   6515 /*------------------------------------------------------------*/
   6516 /*--- Setup and finalisation                               ---*/
   6517 /*------------------------------------------------------------*/
   6518 
   6519 static void mc_post_clo_init ( void )
   6520 {
   6521    /* If we've been asked to emit XML, mash around various other
   6522       options so as to constrain the output somewhat. */
   6523    if (VG_(clo_xml)) {
   6524       /* Extract as much info as possible from the leak checker. */
   6525       MC_(clo_leak_check) = LC_Full;
   6526    }
   6527 
   6528    if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
   6529       VG_(message)(Vg_UserMsg,
   6530                    "Warning: --freelist-big-blocks value %lld has no effect\n"
   6531                    "as it is >= to --freelist-vol value %lld\n",
   6532                    MC_(clo_freelist_big_blocks),
   6533                    MC_(clo_freelist_vol));
   6534 
   6535    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   6536 
   6537    if (MC_(clo_mc_level) == 3) {
   6538       /* We're doing origin tracking. */
   6539 #     ifdef PERF_FAST_STACK
   6540       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
   6541       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
   6542       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
   6543       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
   6544       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
   6545       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
   6546       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
   6547       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
   6548       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
   6549 #     endif
   6550       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
   6551       VG_(track_new_mem_stack_signal)    ( mc_new_mem_w_tid_make_ECU );
   6552    } else {
   6553       /* Not doing origin tracking */
   6554 #     ifdef PERF_FAST_STACK
   6555       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
   6556       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
   6557       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
   6558       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
   6559       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
   6560       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
   6561       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
   6562       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
   6563       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
   6564 #     endif
   6565       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
   6566       VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
   6567    }
   6568 
   6569    // We assume that brk()/sbrk() does not initialise new memory.  Is this
   6570    // accurate?  John Reiser says:
   6571    //
   6572    //   0) sbrk() can *decrease* process address space.  No zero fill is done
   6573    //   for a decrease, not even the fragment on the high end of the last page
   6574    //   that is beyond the new highest address.  For maximum safety and
   6575    //   portability, then the bytes in the last page that reside above [the
   6576    //   new] sbrk(0) should be considered to be uninitialized, but in practice
   6577    //   it is exceedingly likely that they will retain their previous
   6578    //   contents.
   6579    //
   6580    //   1) If an increase is large enough to require new whole pages, then
   6581    //   those new whole pages (like all new pages) are zero-filled by the
   6582    //   operating system.  So if sbrk(0) already is page aligned, then
   6583    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
   6584    //
   6585    //   2) Any increase that lies within an existing allocated page is not
   6586    //   changed.  So if (x = sbrk(0)) is not page aligned, then
   6587    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
   6588    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
   6589    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
   6590    //   of them come along for the ride because the operating system deals
   6591    //   only in whole pages.  Again, for maximum safety and portability, then
   6592    //   anything that lives above [the new] sbrk(0) should be considered
   6593    //   uninitialized, but in practice will retain previous contents [zero in
   6594    //   this case.]"
   6595    //
   6596    // In short:
   6597    //
   6598    //   A key property of sbrk/brk is that new whole pages that are supplied
   6599    //   by the operating system *do* get initialized to zero.
   6600    //
   6601    // As for the portability of all this:
   6602    //
   6603    //   sbrk and brk are not POSIX.  However, any system that is a derivative
   6604    //   of *nix has sbrk and brk because there are too many softwares (such as
   6605    //   the Bourne shell) which rely on the traditional memory map (.text,
   6606    //   .data+.bss, stack) and the existence of sbrk/brk.
   6607    //
   6608    // So we should arguably observe all this.  However:
   6609    // - The current inaccuracy has caused maybe one complaint in seven years(?)
   6610    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
   6611    //   doubt most programmers know the above information.
   6612    // So I'm not terribly unhappy with marking it as undefined. --njn.
   6613    //
   6614    // [More:  I think most of what John said only applies to sbrk().  It seems
   6615    // that brk() always deals in whole pages.  And since this event deals
   6616    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
   6617    // just mark all memory it allocates as defined.]
   6618    //
   6619    if (MC_(clo_mc_level) == 3)
   6620       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_make_ECU );
   6621    else
   6622       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_no_ECU );
   6623 
   6624    /* This origin tracking cache is huge (~100M), so only initialise
   6625       if we need it. */
   6626    if (MC_(clo_mc_level) >= 3) {
   6627       init_OCache();
   6628       tl_assert(ocacheL1 != NULL);
   6629       tl_assert(ocacheL2 != NULL);
   6630    } else {
   6631       tl_assert(ocacheL1 == NULL);
   6632       tl_assert(ocacheL2 == NULL);
   6633    }
   6634 
   6635    MC_(chunk_poolalloc) = VG_(newPA)
   6636       (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
   6637        1000,
   6638        VG_(malloc),
   6639        "mc.cMC.1 (MC_Chunk pools)",
   6640        VG_(free));
   6641 
   6642    /* Do not check definedness of guest state if --undef-value-errors=no */
   6643    if (MC_(clo_mc_level) >= 2)
   6644       VG_(track_pre_reg_read) ( mc_pre_reg_read );
   6645 }
   6646 
   6647 static void print_SM_info(const HChar* type, Int n_SMs)
   6648 {
   6649    VG_(message)(Vg_DebugMsg,
   6650       " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
   6651       type,
   6652       n_SMs,
   6653       n_SMs * sizeof(SecMap) / 1024UL,
   6654       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
   6655 }
   6656 
   6657 static void mc_print_stats (void)
   6658 {
   6659    SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
   6660 
   6661    VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
   6662                 VG_(free_queue_volume), VG_(free_queue_length));
   6663    VG_(message)(Vg_DebugMsg,
   6664       " memcheck: sanity checks: %d cheap, %d expensive\n",
   6665       n_sanity_cheap, n_sanity_expensive );
   6666    VG_(message)(Vg_DebugMsg,
   6667       " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
   6668       n_auxmap_L2_nodes,
   6669       n_auxmap_L2_nodes * 64,
   6670       n_auxmap_L2_nodes / 16 );
   6671    VG_(message)(Vg_DebugMsg,
   6672       " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
   6673       n_auxmap_L1_searches, n_auxmap_L1_cmps,
   6674       (10ULL * n_auxmap_L1_cmps)
   6675          / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
   6676    );
   6677    VG_(message)(Vg_DebugMsg,
   6678       " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
   6679       n_auxmap_L2_searches, n_auxmap_L2_nodes
   6680    );
   6681 
   6682    print_SM_info("n_issued     ", n_issued_SMs);
   6683    print_SM_info("n_deissued   ", n_deissued_SMs);
   6684    print_SM_info("max_noaccess ", max_noaccess_SMs);
   6685    print_SM_info("max_undefined", max_undefined_SMs);
   6686    print_SM_info("max_defined  ", max_defined_SMs);
   6687    print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
   6688 
   6689    // Three DSMs, plus the non-DSM ones
   6690    max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
   6691    // The 3*sizeof(Word) bytes is the AVL node metadata size.
   6692    // The VG_ROUNDUP is because the OSet pool allocator will/must align
   6693    // the elements on pointer size.
   6694    // Note that the pool allocator has some additional small overhead
   6695    // which is not counted in the below.
   6696    // Hardwiring this logic sucks, but I don't see how else to do it.
   6697    max_secVBit_szB = max_secVBit_nodes *
   6698          (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
   6699    max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
   6700 
   6701    VG_(message)(Vg_DebugMsg,
   6702       " memcheck: max sec V bit nodes:    %d (%ldk, %ldM)\n",
   6703       max_secVBit_nodes, max_secVBit_szB / 1024,
   6704                          max_secVBit_szB / (1024 * 1024));
   6705    VG_(message)(Vg_DebugMsg,
   6706       " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
   6707       sec_vbits_new_nodes + sec_vbits_updates,
   6708       sec_vbits_new_nodes, sec_vbits_updates );
   6709    VG_(message)(Vg_DebugMsg,
   6710       " memcheck: max shadow mem size:   %ldk, %ldM\n",
   6711       max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
   6712 
   6713    if (MC_(clo_mc_level) >= 3) {
   6714       VG_(message)(Vg_DebugMsg,
   6715                    " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
   6716                    stats_ocacheL1_find,
   6717                    stats_ocacheL1_misses,
   6718                    stats_ocacheL1_lossage );
   6719       VG_(message)(Vg_DebugMsg,
   6720                    " ocacheL1: %'12lu at 0   %'12lu at 1\n",
   6721                    stats_ocacheL1_find - stats_ocacheL1_misses
   6722                       - stats_ocacheL1_found_at_1
   6723                       - stats_ocacheL1_found_at_N,
   6724                    stats_ocacheL1_found_at_1 );
   6725       VG_(message)(Vg_DebugMsg,
   6726                    " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
   6727                    stats_ocacheL1_found_at_N,
   6728                    stats_ocacheL1_movefwds );
   6729       VG_(message)(Vg_DebugMsg,
   6730                    " ocacheL1: %'12lu sizeB  %'12u useful\n",
   6731                    (UWord)sizeof(OCache),
   6732                    4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
   6733       VG_(message)(Vg_DebugMsg,
   6734                    " ocacheL2: %'12lu refs   %'12lu misses\n",
   6735                    stats__ocacheL2_refs,
   6736                    stats__ocacheL2_misses );
   6737       VG_(message)(Vg_DebugMsg,
   6738                    " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
   6739                    stats__ocacheL2_n_nodes_max,
   6740                    stats__ocacheL2_n_nodes );
   6741       VG_(message)(Vg_DebugMsg,
   6742                    " niacache: %'12lu refs   %'12lu misses\n",
   6743                    stats__nia_cache_queries, stats__nia_cache_misses);
   6744    } else {
   6745       tl_assert(ocacheL1 == NULL);
   6746       tl_assert(ocacheL2 == NULL);
   6747    }
   6748 }
   6749 
   6750 
   6751 static void mc_fini ( Int exitcode )
   6752 {
   6753    MC_(print_malloc_stats)();
   6754 
   6755    if (MC_(clo_leak_check) != LC_Off) {
   6756       LeakCheckParams lcp;
   6757       lcp.mode = MC_(clo_leak_check);
   6758       lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
   6759       lcp.heuristics = MC_(clo_leak_check_heuristics);
   6760       lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
   6761       lcp.deltamode = LCD_Any;
   6762       lcp.max_loss_records_output = 999999999;
   6763       lcp.requested_by_monitor_command = False;
   6764       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
   6765    } else {
   6766       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   6767          VG_(umsg)(
   6768             "For a detailed leak analysis, rerun with: --leak-check=full\n"
   6769             "\n"
   6770          );
   6771       }
   6772    }
   6773 
   6774    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   6775       VG_(message)(Vg_UserMsg,
   6776                    "For counts of detected and suppressed errors, rerun with: -v\n");
   6777    }
   6778 
   6779    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
   6780        && MC_(clo_mc_level) == 2) {
   6781       VG_(message)(Vg_UserMsg,
   6782                    "Use --track-origins=yes to see where "
   6783                    "uninitialised values come from\n");
   6784    }
   6785 
   6786    /* Print a warning if any client-request generated ignore-ranges
   6787       still exist.  It would be reasonable to expect that a properly
   6788       written program would remove any such ranges before exiting, and
   6789       since they are a bit on the dangerous side, let's comment.  By
   6790       contrast ranges which are specified on the command line normally
   6791       pertain to hardware mapped into the address space, and so we
   6792       can't expect the client to have got rid of them. */
   6793    if (gIgnoredAddressRanges) {
   6794       Word i, nBad = 0;
   6795       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
   6796          UWord val     = IAR_INVALID;
   6797          UWord key_min = ~(UWord)0;
   6798          UWord key_max = (UWord)0;
   6799          VG_(indexRangeMap)( &key_min, &key_max, &val,
   6800                              gIgnoredAddressRanges, i );
   6801          if (val != IAR_ClientReq)
   6802            continue;
   6803          /* Print the offending range.  Also, if it is the first,
   6804             print a banner before it. */
   6805          nBad++;
   6806          if (nBad == 1) {
   6807             VG_(umsg)(
   6808               "WARNING: exiting program has the following client-requested\n"
   6809               "WARNING: address error disablement range(s) still in force,\n"
   6810               "WARNING: "
   6811                  "possibly as a result of some mistake in the use of the\n"
   6812               "WARNING: "
   6813                  "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
   6814             );
   6815          }
   6816          VG_(umsg)("   [%ld]  0x%016llx-0x%016llx  %s\n",
   6817                    i, (ULong)key_min, (ULong)key_max, showIARKind(val));
   6818       }
   6819    }
   6820 
   6821    done_prof_mem();
   6822 
   6823    if (VG_(clo_stats))
   6824       mc_print_stats();
   6825 
   6826    if (0) {
   6827       VG_(message)(Vg_DebugMsg,
   6828         "------ Valgrind's client block stats follow ---------------\n" );
   6829       show_client_block_stats();
   6830    }
   6831 }
   6832 
   6833 /* mark the given addr/len unaddressable for watchpoint implementation
   6834    The PointKind will be handled at access time */
   6835 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
   6836                                                   Addr addr, SizeT len)
   6837 {
   6838    /* GDBTD this is somewhat fishy. We might rather have to save the previous
   6839       accessibility and definedness in gdbserver so as to allow restoring it
   6840       properly. Currently, we assume that the user only watches things
   6841       which are properly addressable and defined */
   6842    if (insert)
   6843       MC_(make_mem_noaccess) (addr, len);
   6844    else
   6845       MC_(make_mem_defined)  (addr, len);
   6846    return True;
   6847 }
   6848 
   6849 static void mc_pre_clo_init(void)
   6850 {
   6851    VG_(details_name)            ("Memcheck");
   6852    VG_(details_version)         (NULL);
   6853    VG_(details_description)     ("a memory error detector");
   6854    VG_(details_copyright_author)(
   6855       "Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.");
   6856    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   6857    VG_(details_avg_translation_sizeB) ( 640 );
   6858 
   6859    VG_(basic_tool_funcs)          (mc_post_clo_init,
   6860                                    MC_(instrument),
   6861                                    mc_fini);
   6862 
   6863    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
   6864 
   6865 
   6866    VG_(needs_core_errors)         ();
   6867    VG_(needs_tool_errors)         (MC_(eq_Error),
   6868                                    MC_(before_pp_Error),
   6869                                    MC_(pp_Error),
   6870                                    True,/*show TIDs for errors*/
   6871                                    MC_(update_Error_extra),
   6872                                    MC_(is_recognised_suppression),
   6873                                    MC_(read_extra_suppression_info),
   6874                                    MC_(error_matches_suppression),
   6875                                    MC_(get_error_name),
   6876                                    MC_(get_extra_suppression_info),
   6877                                    MC_(print_extra_suppression_use),
   6878                                    MC_(update_extra_suppression_use));
   6879    VG_(needs_libc_freeres)        ();
   6880    VG_(needs_command_line_options)(mc_process_cmd_line_options,
   6881                                    mc_print_usage,
   6882                                    mc_print_debug_usage);
   6883    VG_(needs_client_requests)     (mc_handle_client_request);
   6884    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
   6885                                    mc_expensive_sanity_check);
   6886    VG_(needs_print_stats)         (mc_print_stats);
   6887    VG_(needs_info_location)       (MC_(pp_describe_addr));
   6888    VG_(needs_malloc_replacement)  (MC_(malloc),
   6889                                    MC_(__builtin_new),
   6890                                    MC_(__builtin_vec_new),
   6891                                    MC_(memalign),
   6892                                    MC_(calloc),
   6893                                    MC_(free),
   6894                                    MC_(__builtin_delete),
   6895                                    MC_(__builtin_vec_delete),
   6896                                    MC_(realloc),
   6897                                    MC_(malloc_usable_size),
   6898                                    MC_MALLOC_DEFAULT_REDZONE_SZB );
   6899    MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
   6900 
   6901    VG_(needs_xml_output)          ();
   6902 
   6903    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
   6904 
   6905    // Handling of mmap and mprotect isn't simple (well, it is simple,
   6906    // but the justification isn't.)  See comments above, just prior to
   6907    // mc_new_mem_mmap.
   6908    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
   6909    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
   6910 
   6911    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
   6912 
   6913    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
   6914    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
   6915    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
   6916 
   6917    /* Defer the specification of the new_mem_stack functions to the
   6918       post_clo_init function, since we need to first parse the command
   6919       line before deciding which set to use. */
   6920 
   6921 #  ifdef PERF_FAST_STACK
   6922    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
   6923    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
   6924    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
   6925    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
   6926    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
   6927    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
   6928    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
   6929    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
   6930    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
   6931 #  endif
   6932    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
   6933 
   6934    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
   6935 
   6936    VG_(track_pre_mem_read)        ( check_mem_is_defined );
   6937    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
   6938    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
   6939    VG_(track_post_mem_write)      ( mc_post_mem_write );
   6940 
   6941    VG_(track_post_reg_write)                  ( mc_post_reg_write );
   6942    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
   6943 
   6944    VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
   6945 
   6946    init_shadow_memory();
   6947    // MC_(chunk_poolalloc) must be allocated in post_clo_init
   6948    tl_assert(MC_(chunk_poolalloc) == NULL);
   6949    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
   6950    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
   6951    init_prof_mem();
   6952 
   6953    tl_assert( mc_expensive_sanity_check() );
   6954 
   6955    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
   6956    tl_assert(sizeof(UWord) == sizeof(Addr));
   6957    // Call me paranoid.  I don't care.
   6958    tl_assert(sizeof(void*) == sizeof(Addr));
   6959 
   6960    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
   6961    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
   6962 
   6963    /* This is small.  Always initialise it. */
   6964    init_nia_to_ecu_cache();
   6965 
   6966    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
   6967       if we need to, since the command line args haven't been
   6968       processed yet.  Hence defer it to mc_post_clo_init. */
   6969    tl_assert(ocacheL1 == NULL);
   6970    tl_assert(ocacheL2 == NULL);
   6971 
   6972    /* Check some important stuff.  See extensive comments above
   6973       re UNALIGNED_OR_HIGH for background. */
   6974 #  if VG_WORDSIZE == 4
   6975    tl_assert(sizeof(void*) == 4);
   6976    tl_assert(sizeof(Addr)  == 4);
   6977    tl_assert(sizeof(UWord) == 4);
   6978    tl_assert(sizeof(Word)  == 4);
   6979    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
   6980    tl_assert(MASK(1) == 0UL);
   6981    tl_assert(MASK(2) == 1UL);
   6982    tl_assert(MASK(4) == 3UL);
   6983    tl_assert(MASK(8) == 7UL);
   6984 #  else
   6985    tl_assert(VG_WORDSIZE == 8);
   6986    tl_assert(sizeof(void*) == 8);
   6987    tl_assert(sizeof(Addr)  == 8);
   6988    tl_assert(sizeof(UWord) == 8);
   6989    tl_assert(sizeof(Word)  == 8);
   6990    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFFULL);
   6991    tl_assert(MASK(1) == 0xFFFFFFF000000000ULL);
   6992    tl_assert(MASK(2) == 0xFFFFFFF000000001ULL);
   6993    tl_assert(MASK(4) == 0xFFFFFFF000000003ULL);
   6994    tl_assert(MASK(8) == 0xFFFFFFF000000007ULL);
   6995 #  endif
   6996 }
   6997 
   6998 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
   6999 
   7000 /*--------------------------------------------------------------------*/
   7001 /*--- end                                                mc_main.c ---*/
   7002 /*--------------------------------------------------------------------*/
   7003