Home | History | Annotate | Download | only in memcheck
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
      4 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
      5 /*---                                                    mc_main.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of MemCheck, a heavyweight Valgrind tool for
     10    detecting memory errors.
     11 
     12    Copyright (C) 2000-2012 Julian Seward
     13       jseward (at) acm.org
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 */
     32 
     33 #include "pub_tool_basics.h"
     34 #include "pub_tool_aspacemgr.h"
     35 #include "pub_tool_gdbserver.h"
     36 #include "pub_tool_poolalloc.h"
     37 #include "pub_tool_hashtable.h"     // For mc_include.h
     38 #include "pub_tool_libcbase.h"
     39 #include "pub_tool_libcassert.h"
     40 #include "pub_tool_libcprint.h"
     41 #include "pub_tool_machine.h"
     42 #include "pub_tool_mallocfree.h"
     43 #include "pub_tool_options.h"
     44 #include "pub_tool_oset.h"
     45 #include "pub_tool_replacemalloc.h"
     46 #include "pub_tool_tooliface.h"
     47 #include "pub_tool_threadstate.h"
     48 
     49 #include "mc_include.h"
     50 #include "memcheck.h"   /* for client requests */
     51 
     52 
     53 /* Set to 1 to do a little more sanity checking */
     54 #define VG_DEBUG_MEMORY 0
     55 
     56 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
     57 
     58 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
     59 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
     60 
     61 
     62 /*------------------------------------------------------------*/
     63 /*--- Fast-case knobs                                      ---*/
     64 /*------------------------------------------------------------*/
     65 
     66 // Comment these out to disable the fast cases (don't just set them to zero).
     67 
     68 #define PERF_FAST_LOADV    1
     69 #define PERF_FAST_STOREV   1
     70 
     71 #define PERF_FAST_SARP     1
     72 
     73 #define PERF_FAST_STACK    1
     74 #define PERF_FAST_STACK2   1
     75 
     76 /* Change this to 1 to enable assertions on origin tracking cache fast
     77    paths */
     78 #define OC_ENABLE_ASSERTIONS 0
     79 
     80 
     81 /*------------------------------------------------------------*/
     82 /*--- Comments on the origin tracking implementation       ---*/
     83 /*------------------------------------------------------------*/
     84 
     85 /* See detailed comment entitled
     86    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
     87    which is contained further on in this file. */
     88 
     89 
     90 /*------------------------------------------------------------*/
     91 /*--- V bits and A bits                                    ---*/
     92 /*------------------------------------------------------------*/
     93 
     94 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
     95    thinks the corresponding value bit is defined.  And every memory byte
     96    has an A bit, which tracks whether Memcheck thinks the program can access
     97    it safely (ie. it's mapped, and has at least one of the RWX permission bits
     98    set).  So every N-bit register is shadowed with N V bits, and every memory
     99    byte is shadowed with 8 V bits and one A bit.
    100 
    101    In the implementation, we use two forms of compression (compressed V bits
    102    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
    103    for memory.
    104 
    105    Memcheck also tracks extra information about each heap block that is
    106    allocated, for detecting memory leaks and other purposes.
    107 */
    108 
    109 /*------------------------------------------------------------*/
    110 /*--- Basic A/V bitmap representation.                     ---*/
    111 /*------------------------------------------------------------*/
    112 
    113 /* All reads and writes are checked against a memory map (a.k.a. shadow
    114    memory), which records the state of all memory in the process.
    115 
    116    On 32-bit machines the memory map is organised as follows.
    117    The top 16 bits of an address are used to index into a top-level
    118    map table, containing 65536 entries.  Each entry is a pointer to a
    119    second-level map, which records the accesibililty and validity
    120    permissions for the 65536 bytes indexed by the lower 16 bits of the
    121    address.  Each byte is represented by two bits (details are below).  So
    122    each second-level map contains 16384 bytes.  This two-level arrangement
    123    conveniently divides the 4G address space into 64k lumps, each size 64k
    124    bytes.
    125 
    126    All entries in the primary (top-level) map must point to a valid
    127    secondary (second-level) map.  Since many of the 64kB chunks will
    128    have the same status for every bit -- ie. noaccess (for unused
    129    address space) or entirely addressable and defined (for code segments) --
    130    there are three distinguished secondary maps, which indicate 'noaccess',
    131    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
    132    map entry points to the relevant distinguished map.  In practice,
    133    typically more than half of the addressable memory is represented with
    134    the 'undefined' or 'defined' distinguished secondary map, so it gives a
    135    good saving.  It also lets us set the V+A bits of large address regions
    136    quickly in set_address_range_perms().
    137 
    138    On 64-bit machines it's more complicated.  If we followed the same basic
    139    scheme we'd have a four-level table which would require too many memory
    140    accesses.  So instead the top-level map table has 2^19 entries (indexed
    141    using bits 16..34 of the address);  this covers the bottom 32GB.  Any
    142    accesses above 32GB are handled with a slow, sparse auxiliary table.
    143    Valgrind's address space manager tries very hard to keep things below
    144    this 32GB barrier so that performance doesn't suffer too much.
    145 
    146    Note that this file has a lot of different functions for reading and
    147    writing shadow memory.  Only a couple are strictly necessary (eg.
    148    get_vabits2 and set_vabits2), most are just specialised for specific
    149    common cases to improve performance.
    150 
    151    Aside: the V+A bits are less precise than they could be -- we have no way
    152    of marking memory as read-only.  It would be great if we could add an
    153    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
    154    which requires 2.3 bits to hold, and there's no way to do that elegantly
    155    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
    156    seem worth it.
    157 */
    158 
    159 /* --------------- Basic configuration --------------- */
    160 
    161 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
    162 
    163 #if VG_WORDSIZE == 4
    164 
    165 /* cover the entire address space */
    166 #  define N_PRIMARY_BITS  16
    167 
    168 #else
    169 
    170 /* Just handle the first 32G fast and the rest via auxiliary
    171    primaries.  If you change this, Memcheck will assert at startup.
    172    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
    173 #  define N_PRIMARY_BITS  19
    174 
    175 #endif
    176 
    177 
    178 /* Do not change this. */
    179 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
    180 
    181 /* Do not change this. */
    182 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
    183 
    184 
    185 /* --------------- Secondary maps --------------- */
    186 
    187 // Each byte of memory conceptually has an A bit, which indicates its
    188 // addressability, and 8 V bits, which indicates its definedness.
    189 //
    190 // But because very few bytes are partially defined, we can use a nice
    191 // compression scheme to reduce the size of shadow memory.  Each byte of
    192 // memory has 2 bits which indicates its state (ie. V+A bits):
    193 //
    194 //   00:  noaccess    (unaddressable but treated as fully defined)
    195 //   01:  undefined   (addressable and fully undefined)
    196 //   10:  defined     (addressable and fully defined)
    197 //   11:  partdefined (addressable and partially defined)
    198 //
    199 // In the "partdefined" case, we use a secondary table to store the V bits.
    200 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
    201 // bits.
    202 //
    203 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
    204 // four bytes (32 bits) of memory are in each chunk.  Hence the name
    205 // "vabits8".  This lets us get the V+A bits for four bytes at a time
    206 // easily (without having to do any shifting and/or masking), and that is a
    207 // very common operation.  (Note that although each vabits8 chunk
    208 // is 8 bits in size, it represents 32 bits of memory.)
    209 //
    210 // The representation is "inverse" little-endian... each 4 bytes of
    211 // memory is represented by a 1 byte value, where:
    212 //
    213 // - the status of byte (a+0) is held in bits [1..0]
    214 // - the status of byte (a+1) is held in bits [3..2]
    215 // - the status of byte (a+2) is held in bits [5..4]
    216 // - the status of byte (a+3) is held in bits [7..6]
    217 //
    218 // It's "inverse" because endianness normally describes a mapping from
    219 // value bits to memory addresses;  in this case the mapping is inverted.
    220 // Ie. instead of particular value bits being held in certain addresses, in
    221 // this case certain addresses are represented by particular value bits.
    222 // See insert_vabits2_into_vabits8() for an example.
    223 //
    224 // But note that we don't compress the V bits stored in registers;  they
    225 // need to be explicit to made the shadow operations possible.  Therefore
    226 // when moving values between registers and memory we need to convert
    227 // between the expanded in-register format and the compressed in-memory
    228 // format.  This isn't so difficult, it just requires careful attention in a
    229 // few places.
    230 
    231 // These represent eight bits of memory.
    232 #define VA_BITS2_NOACCESS     0x0      // 00b
    233 #define VA_BITS2_UNDEFINED    0x1      // 01b
    234 #define VA_BITS2_DEFINED      0x2      // 10b
    235 #define VA_BITS2_PARTDEFINED  0x3      // 11b
    236 
    237 // These represent 16 bits of memory.
    238 #define VA_BITS4_NOACCESS     0x0      // 00_00b
    239 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
    240 #define VA_BITS4_DEFINED      0xa      // 10_10b
    241 
    242 // These represent 32 bits of memory.
    243 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
    244 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
    245 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
    246 
    247 // These represent 64 bits of memory.
    248 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
    249 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
    250 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
    251 
    252 
    253 #define SM_CHUNKS             16384
    254 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
    255 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
    256 
    257 // Paranoia:  it's critical for performance that the requested inlining
    258 // occurs.  So try extra hard.
    259 #define INLINE    inline __attribute__((always_inline))
    260 
    261 static INLINE Addr start_of_this_sm ( Addr a ) {
    262    return (a & (~SM_MASK));
    263 }
    264 static INLINE Bool is_start_of_sm ( Addr a ) {
    265    return (start_of_this_sm(a) == a);
    266 }
    267 
    268 typedef
    269    struct {
    270       UChar vabits8[SM_CHUNKS];
    271    }
    272    SecMap;
    273 
    274 // 3 distinguished secondary maps, one for no-access, one for
    275 // accessible but undefined, and one for accessible and defined.
    276 // Distinguished secondaries may never be modified.
    277 #define SM_DIST_NOACCESS   0
    278 #define SM_DIST_UNDEFINED  1
    279 #define SM_DIST_DEFINED    2
    280 
    281 static SecMap sm_distinguished[3];
    282 
    283 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
    284    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
    285 }
    286 
    287 // Forward declaration
    288 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
    289 
    290 /* dist_sm points to one of our three distinguished secondaries.  Make
    291    a copy of it so that we can write to it.
    292 */
    293 static SecMap* copy_for_writing ( SecMap* dist_sm )
    294 {
    295    SecMap* new_sm;
    296    tl_assert(dist_sm == &sm_distinguished[0]
    297           || dist_sm == &sm_distinguished[1]
    298           || dist_sm == &sm_distinguished[2]);
    299 
    300    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
    301    if (new_sm == NULL)
    302       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
    303                                    sizeof(SecMap) );
    304    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
    305    update_SM_counts(dist_sm, new_sm);
    306    return new_sm;
    307 }
    308 
    309 /* --------------- Stats --------------- */
    310 
    311 static Int   n_issued_SMs      = 0;
    312 static Int   n_deissued_SMs    = 0;
    313 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
    314 static Int   n_undefined_SMs   = 0;
    315 static Int   n_defined_SMs     = 0;
    316 static Int   n_non_DSM_SMs     = 0;
    317 static Int   max_noaccess_SMs  = 0;
    318 static Int   max_undefined_SMs = 0;
    319 static Int   max_defined_SMs   = 0;
    320 static Int   max_non_DSM_SMs   = 0;
    321 
    322 /* # searches initiated in auxmap_L1, and # base cmps required */
    323 static ULong n_auxmap_L1_searches  = 0;
    324 static ULong n_auxmap_L1_cmps      = 0;
    325 /* # of searches that missed in auxmap_L1 and therefore had to
    326    be handed to auxmap_L2. And the number of nodes inserted. */
    327 static ULong n_auxmap_L2_searches  = 0;
    328 static ULong n_auxmap_L2_nodes     = 0;
    329 
    330 static Int   n_sanity_cheap     = 0;
    331 static Int   n_sanity_expensive = 0;
    332 
    333 static Int   n_secVBit_nodes   = 0;
    334 static Int   max_secVBit_nodes = 0;
    335 
    336 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
    337 {
    338    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
    339    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
    340    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
    341    else                                                  { n_non_DSM_SMs  --;
    342                                                            n_deissued_SMs ++; }
    343 
    344    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
    345    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
    346    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
    347    else                                                  { n_non_DSM_SMs  ++;
    348                                                            n_issued_SMs   ++; }
    349 
    350    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
    351    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
    352    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
    353    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
    354 }
    355 
    356 /* --------------- Primary maps --------------- */
    357 
    358 /* The main primary map.  This covers some initial part of the address
    359    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
    360    handled using the auxiliary primary map.
    361 */
    362 static SecMap* primary_map[N_PRIMARY_MAP];
    363 
    364 
    365 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
    366    value, and sm points at the relevant secondary map.  As with the
    367    main primary map, the secondary may be either a real secondary, or
    368    one of the three distinguished secondaries.  DO NOT CHANGE THIS
    369    LAYOUT: the first word has to be the key for OSet fast lookups.
    370 */
    371 typedef
    372    struct {
    373       Addr    base;
    374       SecMap* sm;
    375    }
    376    AuxMapEnt;
    377 
    378 /* Tunable parameter: How big is the L1 queue? */
    379 #define N_AUXMAP_L1 24
    380 
    381 /* Tunable parameter: How far along the L1 queue to insert
    382    entries resulting from L2 lookups? */
    383 #define AUXMAP_L1_INSERT_IX 12
    384 
    385 static struct {
    386           Addr       base;
    387           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
    388        }
    389        auxmap_L1[N_AUXMAP_L1];
    390 
    391 static OSet* auxmap_L2 = NULL;
    392 
    393 static void init_auxmap_L1_L2 ( void )
    394 {
    395    Int i;
    396    for (i = 0; i < N_AUXMAP_L1; i++) {
    397       auxmap_L1[i].base = 0;
    398       auxmap_L1[i].ent  = NULL;
    399    }
    400 
    401    tl_assert(0 == offsetof(AuxMapEnt,base));
    402    tl_assert(sizeof(Addr) == sizeof(void*));
    403    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
    404                                     /*fastCmp*/ NULL,
    405                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
    406 }
    407 
    408 /* Check representation invariants; if OK return NULL; else a
    409    descriptive bit of text.  Also return the number of
    410    non-distinguished secondary maps referred to from the auxiliary
    411    primary maps. */
    412 
    413 static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
    414 {
    415    Word i, j;
    416    /* On a 32-bit platform, the L2 and L1 tables should
    417       both remain empty forever.
    418 
    419       On a 64-bit platform:
    420       In the L2 table:
    421        all .base & 0xFFFF == 0
    422        all .base > MAX_PRIMARY_ADDRESS
    423       In the L1 table:
    424        all .base & 0xFFFF == 0
    425        all (.base > MAX_PRIMARY_ADDRESS
    426             .base & 0xFFFF == 0
    427             and .ent points to an AuxMapEnt with the same .base)
    428            or
    429            (.base == 0 and .ent == NULL)
    430    */
    431    *n_secmaps_found = 0;
    432    if (sizeof(void*) == 4) {
    433       /* 32-bit platform */
    434       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
    435          return "32-bit: auxmap_L2 is non-empty";
    436       for (i = 0; i < N_AUXMAP_L1; i++)
    437         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
    438       return "32-bit: auxmap_L1 is non-empty";
    439    } else {
    440       /* 64-bit platform */
    441       UWord elems_seen = 0;
    442       AuxMapEnt *elem, *res;
    443       AuxMapEnt key;
    444       /* L2 table */
    445       VG_(OSetGen_ResetIter)(auxmap_L2);
    446       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
    447          elems_seen++;
    448          if (0 != (elem->base & (Addr)0xFFFF))
    449             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
    450          if (elem->base <= MAX_PRIMARY_ADDRESS)
    451             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
    452          if (elem->sm == NULL)
    453             return "64-bit: .sm in _L2 is NULL";
    454          if (!is_distinguished_sm(elem->sm))
    455             (*n_secmaps_found)++;
    456       }
    457       if (elems_seen != n_auxmap_L2_nodes)
    458          return "64-bit: disagreement on number of elems in _L2";
    459       /* Check L1-L2 correspondence */
    460       for (i = 0; i < N_AUXMAP_L1; i++) {
    461          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
    462             continue;
    463          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
    464             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
    465          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
    466             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
    467          if (auxmap_L1[i].ent == NULL)
    468             return "64-bit: .ent is NULL in auxmap_L1";
    469          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
    470             return "64-bit: _L1 and _L2 bases are inconsistent";
    471          /* Look it up in auxmap_L2. */
    472          key.base = auxmap_L1[i].base;
    473          key.sm   = 0;
    474          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    475          if (res == NULL)
    476             return "64-bit: _L1 .base not found in _L2";
    477          if (res != auxmap_L1[i].ent)
    478             return "64-bit: _L1 .ent disagrees with _L2 entry";
    479       }
    480       /* Check L1 contains no duplicates */
    481       for (i = 0; i < N_AUXMAP_L1; i++) {
    482          if (auxmap_L1[i].base == 0)
    483             continue;
    484 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
    485             if (auxmap_L1[j].base == 0)
    486                continue;
    487             if (auxmap_L1[j].base == auxmap_L1[i].base)
    488                return "64-bit: duplicate _L1 .base entries";
    489          }
    490       }
    491    }
    492    return NULL; /* ok */
    493 }
    494 
    495 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
    496 {
    497    Word i;
    498    tl_assert(ent);
    499    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
    500    for (i = N_AUXMAP_L1-1; i > rank; i--)
    501       auxmap_L1[i] = auxmap_L1[i-1];
    502    auxmap_L1[rank].base = ent->base;
    503    auxmap_L1[rank].ent  = ent;
    504 }
    505 
    506 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
    507 {
    508    AuxMapEnt  key;
    509    AuxMapEnt* res;
    510    Word       i;
    511 
    512    tl_assert(a > MAX_PRIMARY_ADDRESS);
    513    a &= ~(Addr)0xFFFF;
    514 
    515    /* First search the front-cache, which is a self-organising
    516       list containing the most popular entries. */
    517 
    518    if (LIKELY(auxmap_L1[0].base == a))
    519       return auxmap_L1[0].ent;
    520    if (LIKELY(auxmap_L1[1].base == a)) {
    521       Addr       t_base = auxmap_L1[0].base;
    522       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
    523       auxmap_L1[0].base = auxmap_L1[1].base;
    524       auxmap_L1[0].ent  = auxmap_L1[1].ent;
    525       auxmap_L1[1].base = t_base;
    526       auxmap_L1[1].ent  = t_ent;
    527       return auxmap_L1[0].ent;
    528    }
    529 
    530    n_auxmap_L1_searches++;
    531 
    532    for (i = 0; i < N_AUXMAP_L1; i++) {
    533       if (auxmap_L1[i].base == a) {
    534          break;
    535       }
    536    }
    537    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
    538 
    539    n_auxmap_L1_cmps += (ULong)(i+1);
    540 
    541    if (i < N_AUXMAP_L1) {
    542       if (i > 0) {
    543          Addr       t_base = auxmap_L1[i-1].base;
    544          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
    545          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
    546          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
    547          auxmap_L1[i-0].base = t_base;
    548          auxmap_L1[i-0].ent  = t_ent;
    549          i--;
    550       }
    551       return auxmap_L1[i].ent;
    552    }
    553 
    554    n_auxmap_L2_searches++;
    555 
    556    /* First see if we already have it. */
    557    key.base = a;
    558    key.sm   = 0;
    559 
    560    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    561    if (res)
    562       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
    563    return res;
    564 }
    565 
    566 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
    567 {
    568    AuxMapEnt *nyu, *res;
    569 
    570    /* First see if we already have it. */
    571    res = maybe_find_in_auxmap( a );
    572    if (LIKELY(res))
    573       return res;
    574 
    575    /* Ok, there's no entry in the secondary map, so we'll have
    576       to allocate one. */
    577    a &= ~(Addr)0xFFFF;
    578 
    579    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
    580    tl_assert(nyu);
    581    nyu->base = a;
    582    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
    583    VG_(OSetGen_Insert)( auxmap_L2, nyu );
    584    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
    585    n_auxmap_L2_nodes++;
    586    return nyu;
    587 }
    588 
    589 /* --------------- SecMap fundamentals --------------- */
    590 
    591 // In all these, 'low' means it's definitely in the main primary map,
    592 // 'high' means it's definitely in the auxiliary table.
    593 
    594 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
    595 {
    596    UWord pm_off = a >> 16;
    597 #  if VG_DEBUG_MEMORY >= 1
    598    tl_assert(pm_off < N_PRIMARY_MAP);
    599 #  endif
    600    return &primary_map[ pm_off ];
    601 }
    602 
    603 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
    604 {
    605    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
    606    return &am->sm;
    607 }
    608 
    609 static SecMap** get_secmap_ptr ( Addr a )
    610 {
    611    return ( a <= MAX_PRIMARY_ADDRESS
    612           ? get_secmap_low_ptr(a)
    613           : get_secmap_high_ptr(a));
    614 }
    615 
    616 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
    617 {
    618    return *get_secmap_low_ptr(a);
    619 }
    620 
    621 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
    622 {
    623    return *get_secmap_high_ptr(a);
    624 }
    625 
    626 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
    627 {
    628    SecMap** p = get_secmap_low_ptr(a);
    629    if (UNLIKELY(is_distinguished_sm(*p)))
    630       *p = copy_for_writing(*p);
    631    return *p;
    632 }
    633 
    634 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
    635 {
    636    SecMap** p = get_secmap_high_ptr(a);
    637    if (UNLIKELY(is_distinguished_sm(*p)))
    638       *p = copy_for_writing(*p);
    639    return *p;
    640 }
    641 
    642 /* Produce the secmap for 'a', either from the primary map or by
    643    ensuring there is an entry for it in the aux primary map.  The
    644    secmap may be a distinguished one as the caller will only want to
    645    be able to read it.
    646 */
    647 static INLINE SecMap* get_secmap_for_reading ( Addr a )
    648 {
    649    return ( a <= MAX_PRIMARY_ADDRESS
    650           ? get_secmap_for_reading_low (a)
    651           : get_secmap_for_reading_high(a) );
    652 }
    653 
    654 /* Produce the secmap for 'a', either from the primary map or by
    655    ensuring there is an entry for it in the aux primary map.  The
    656    secmap may not be a distinguished one, since the caller will want
    657    to be able to write it.  If it is a distinguished secondary, make a
    658    writable copy of it, install it, and return the copy instead.  (COW
    659    semantics).
    660 */
    661 static SecMap* get_secmap_for_writing ( Addr a )
    662 {
    663    return ( a <= MAX_PRIMARY_ADDRESS
    664           ? get_secmap_for_writing_low (a)
    665           : get_secmap_for_writing_high(a) );
    666 }
    667 
    668 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
    669    allocate one if one doesn't already exist.  This is used by the
    670    leak checker.
    671 */
    672 static SecMap* maybe_get_secmap_for ( Addr a )
    673 {
    674    if (a <= MAX_PRIMARY_ADDRESS) {
    675       return get_secmap_for_reading_low(a);
    676    } else {
    677       AuxMapEnt* am = maybe_find_in_auxmap(a);
    678       return am ? am->sm : NULL;
    679    }
    680 }
    681 
    682 /* --------------- Fundamental functions --------------- */
    683 
    684 static INLINE
    685 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
    686 {
    687    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
    688    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
    689    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
    690 }
    691 
    692 static INLINE
    693 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
    694 {
    695    UInt shift;
    696    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    697    shift     =  (a & 2)   << 1;        // shift by 0 or 4
    698    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
    699    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
    700 }
    701 
    702 static INLINE
    703 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
    704 {
    705    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
    706    vabits8 >>= shift;                  // shift the two bits to the bottom
    707    return 0x3 & vabits8;               // mask out the rest
    708 }
    709 
    710 static INLINE
    711 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
    712 {
    713    UInt shift;
    714    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    715    shift = (a & 2) << 1;               // shift by 0 or 4
    716    vabits8 >>= shift;                  // shift the four bits to the bottom
    717    return 0xf & vabits8;               // mask out the rest
    718 }
    719 
    720 // Note that these four are only used in slow cases.  The fast cases do
    721 // clever things like combine the auxmap check (in
    722 // get_secmap_{read,writ}able) with alignment checks.
    723 
    724 // *** WARNING! ***
    725 // Any time this function is called, if it is possible that vabits2
    726 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
    727 // sec-V-bits table must also be set!
    728 static INLINE
    729 void set_vabits2 ( Addr a, UChar vabits2 )
    730 {
    731    SecMap* sm       = get_secmap_for_writing(a);
    732    UWord   sm_off   = SM_OFF(a);
    733    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
    734 }
    735 
    736 static INLINE
    737 UChar get_vabits2 ( Addr a )
    738 {
    739    SecMap* sm       = get_secmap_for_reading(a);
    740    UWord   sm_off   = SM_OFF(a);
    741    UChar   vabits8  = sm->vabits8[sm_off];
    742    return extract_vabits2_from_vabits8(a, vabits8);
    743 }
    744 
    745 // *** WARNING! ***
    746 // Any time this function is called, if it is possible that any of the
    747 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
    748 // corresponding entry(s) in the sec-V-bits table must also be set!
    749 static INLINE
    750 UChar get_vabits8_for_aligned_word32 ( Addr a )
    751 {
    752    SecMap* sm       = get_secmap_for_reading(a);
    753    UWord   sm_off   = SM_OFF(a);
    754    UChar   vabits8  = sm->vabits8[sm_off];
    755    return vabits8;
    756 }
    757 
    758 static INLINE
    759 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
    760 {
    761    SecMap* sm       = get_secmap_for_writing(a);
    762    UWord   sm_off   = SM_OFF(a);
    763    sm->vabits8[sm_off] = vabits8;
    764 }
    765 
    766 
    767 // Forward declarations
    768 static UWord get_sec_vbits8(Addr a);
    769 static void  set_sec_vbits8(Addr a, UWord vbits8);
    770 
    771 // Returns False if there was an addressability error.
    772 static INLINE
    773 Bool set_vbits8 ( Addr a, UChar vbits8 )
    774 {
    775    Bool  ok      = True;
    776    UChar vabits2 = get_vabits2(a);
    777    if ( VA_BITS2_NOACCESS != vabits2 ) {
    778       // Addressable.  Convert in-register format to in-memory format.
    779       // Also remove any existing sec V bit entry for the byte if no
    780       // longer necessary.
    781       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
    782       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
    783       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
    784                                                 set_sec_vbits8(a, vbits8);  }
    785       set_vabits2(a, vabits2);
    786 
    787    } else {
    788       // Unaddressable!  Do nothing -- when writing to unaddressable
    789       // memory it acts as a black hole, and the V bits can never be seen
    790       // again.  So we don't have to write them at all.
    791       ok = False;
    792    }
    793    return ok;
    794 }
    795 
    796 // Returns False if there was an addressability error.  In that case, we put
    797 // all defined bits into vbits8.
    798 static INLINE
    799 Bool get_vbits8 ( Addr a, UChar* vbits8 )
    800 {
    801    Bool  ok      = True;
    802    UChar vabits2 = get_vabits2(a);
    803 
    804    // Convert the in-memory format to in-register format.
    805    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
    806    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
    807    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
    808       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
    809       ok = False;
    810    } else {
    811       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
    812       *vbits8 = get_sec_vbits8(a);
    813    }
    814    return ok;
    815 }
    816 
    817 
    818 /* --------------- Secondary V bit table ------------ */
    819 
    820 // This table holds the full V bit pattern for partially-defined bytes
    821 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
    822 // memory.
    823 //
    824 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
    825 // then overwrite the same address with a fully defined byte, the sec-V-bit
    826 // node will not necessarily be removed.  This is because checking for
    827 // whether removal is necessary would slow down the fast paths.
    828 //
    829 // To avoid the stale nodes building up too much, we periodically (once the
    830 // table reaches a certain size) garbage collect (GC) the table by
    831 // traversing it and evicting any nodes not having PDB.
    832 // If more than a certain proportion of nodes survived, we increase the
    833 // table size so that GCs occur less often.
    834 //
    835 // This policy is designed to avoid bad table bloat in the worst case where
    836 // a program creates huge numbers of stale PDBs -- we would get this bloat
    837 // if we had no GC -- while handling well the case where a node becomes
    838 // stale but shortly afterwards is rewritten with a PDB and so becomes
    839 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
    840 // remove all stale nodes as soon as possible, we just end up re-adding a
    841 // lot of them in later again.  The "sufficiently stale" approach avoids
    842 // this.  (If a program has many live PDBs, performance will just suck,
    843 // there's no way around that.)
    844 //
    845 // Further comments, JRS 14 Feb 2012.  It turns out that the policy of
    846 // holding on to stale entries for 2 GCs before discarding them can lead
    847 // to massive space leaks.  So we're changing to an arrangement where
    848 // lines are evicted as soon as they are observed to be stale during a
    849 // GC.  This also has a side benefit of allowing the sufficiently_stale
    850 // field to be removed from the SecVBitNode struct, reducing its size by
    851 // 8 bytes, which is a substantial space saving considering that the
    852 // struct was previously 32 or so bytes, on a 64 bit target.
    853 //
    854 // In order to try and mitigate the problem that the "sufficiently stale"
    855 // heuristic was designed to avoid, the table size is allowed to drift
    856 // up ("DRIFTUP") slowly to 80000, even if the residency is low.  This
    857 // means that nodes will exist in the table longer on average, and hopefully
    858 // will be deleted and re-added less frequently.
    859 //
    860 // The previous scaling up mechanism (now called STEPUP) is retained:
    861 // if residency exceeds 50%, the table is scaled up, although by a
    862 // factor sqrt(2) rather than 2 as before.  This effectively doubles the
    863 // frequency of GCs when there are many PDBs at reduces the tendency of
    864 // stale PDBs to reside for long periods in the table.
    865 
    866 static OSet* secVBitTable;
    867 
    868 // Stats
    869 static ULong sec_vbits_new_nodes = 0;
    870 static ULong sec_vbits_updates   = 0;
    871 
    872 // This must be a power of two;  this is checked in mc_pre_clo_init().
    873 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
    874 // a larger address range) they take more space but we can get multiple
    875 // partially-defined bytes in one if they are close to each other, reducing
    876 // the number of total nodes.  In practice sometimes they are clustered (eg.
    877 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
    878 // row), but often not.  So we choose something intermediate.
    879 #define BYTES_PER_SEC_VBIT_NODE     16
    880 
    881 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
    882 // more than this many nodes survive a GC.
    883 #define STEPUP_SURVIVOR_PROPORTION  0.5
    884 #define STEPUP_GROWTH_FACTOR        1.414213562
    885 
    886 // If the above heuristic doesn't apply, then we may make the table
    887 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
    888 // this many nodes survive a GC, _and_ the total table size does
    889 // not exceed a fixed limit.  The numbers are somewhat arbitrary, but
    890 // work tolerably well on long Firefox runs.  The scaleup ratio of 1.5%
    891 // effectively although gradually reduces residency and increases time
    892 // between GCs for programs with small numbers of PDBs.  The 80000 limit
    893 // effectively limits the table size to around 2MB for programs with
    894 // small numbers of PDBs, whilst giving a reasonably long lifetime to
    895 // entries, to try and reduce the costs resulting from deleting and
    896 // re-adding of entries.
    897 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
    898 #define DRIFTUP_GROWTH_FACTOR       1.015
    899 #define DRIFTUP_MAX_SIZE            80000
    900 
    901 // We GC the table when it gets this many nodes in it, ie. it's effectively
    902 // the table size.  It can change.
    903 static Int  secVBitLimit = 1000;
    904 
    905 // The number of GCs done, used to age sec-V-bit nodes for eviction.
    906 // Because it's unsigned, wrapping doesn't matter -- the right answer will
    907 // come out anyway.
    908 static UInt GCs_done = 0;
    909 
    910 typedef
    911    struct {
    912       Addr  a;
    913       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
    914    }
    915    SecVBitNode;
    916 
    917 static OSet* createSecVBitTable(void)
    918 {
    919    OSet* newSecVBitTable;
    920    newSecVBitTable = VG_(OSetGen_Create_With_Pool)
    921       ( offsetof(SecVBitNode, a),
    922         NULL, // use fast comparisons
    923         VG_(malloc), "mc.cSVT.1 (sec VBit table)",
    924         VG_(free),
    925         1000,
    926         sizeof(SecVBitNode));
    927    return newSecVBitTable;
    928 }
    929 
    930 static void gcSecVBitTable(void)
    931 {
    932    OSet*        secVBitTable2;
    933    SecVBitNode* n;
    934    Int          i, n_nodes = 0, n_survivors = 0;
    935 
    936    GCs_done++;
    937 
    938    // Create the new table.
    939    secVBitTable2 = createSecVBitTable();
    940 
    941    // Traverse the table, moving fresh nodes into the new table.
    942    VG_(OSetGen_ResetIter)(secVBitTable);
    943    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
    944       // Keep node if any of its bytes are non-stale.  Using
    945       // get_vabits2() for the lookup is not very efficient, but I don't
    946       // think it matters.
    947       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
    948          if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
    949             // Found a non-stale byte, so keep =>
    950             // Insert a copy of the node into the new table.
    951             SecVBitNode* n2 =
    952                VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
    953             *n2 = *n;
    954             VG_(OSetGen_Insert)(secVBitTable2, n2);
    955             break;
    956          }
    957       }
    958    }
    959 
    960    // Get the before and after sizes.
    961    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
    962    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
    963 
    964    // Destroy the old table, and put the new one in its place.
    965    VG_(OSetGen_Destroy)(secVBitTable);
    966    secVBitTable = secVBitTable2;
    967 
    968    if (VG_(clo_verbosity) > 1) {
    969       Char percbuf[7];
    970       VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf);
    971       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n",
    972                    n_nodes, n_survivors, percbuf);
    973    }
    974 
    975    // Increase table size if necessary.
    976    if ((Double)n_survivors
    977        > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
    978       secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
    979       if (VG_(clo_verbosity) > 1)
    980          VG_(message)(Vg_DebugMsg,
    981                       "memcheck GC: %d new table size (stepup)\n",
    982                       secVBitLimit);
    983    }
    984    else
    985    if (secVBitLimit < DRIFTUP_MAX_SIZE
    986        && (Double)n_survivors
    987           > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
    988       secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
    989       if (VG_(clo_verbosity) > 1)
    990          VG_(message)(Vg_DebugMsg,
    991                       "memcheck GC: %d new table size (driftup)\n",
    992                       secVBitLimit);
    993    }
    994 }
    995 
    996 static UWord get_sec_vbits8(Addr a)
    997 {
    998    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
    999    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
   1000    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
   1001    UChar        vbits8;
   1002    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
   1003    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
   1004    // make it to the secondary V bits table.
   1005    vbits8 = n->vbits8[amod];
   1006    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
   1007    return vbits8;
   1008 }
   1009 
   1010 static void set_sec_vbits8(Addr a, UWord vbits8)
   1011 {
   1012    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
   1013    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
   1014    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
   1015    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
   1016    // make it to the secondary V bits table.
   1017    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
   1018    if (n) {
   1019       n->vbits8[amod] = vbits8;     // update
   1020       sec_vbits_updates++;
   1021    } else {
   1022       // Do a table GC if necessary.  Nb: do this before creating and
   1023       // inserting the new node, to avoid erroneously GC'ing the new node.
   1024       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
   1025          gcSecVBitTable();
   1026       }
   1027 
   1028       // New node:  assign the specific byte, make the rest invalid (they
   1029       // should never be read as-is, but be cautious).
   1030       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
   1031       n->a            = aAligned;
   1032       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
   1033          n->vbits8[i] = V_BITS8_UNDEFINED;
   1034       }
   1035       n->vbits8[amod] = vbits8;
   1036 
   1037       // Insert the new node.
   1038       VG_(OSetGen_Insert)(secVBitTable, n);
   1039       sec_vbits_new_nodes++;
   1040 
   1041       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
   1042       if (n_secVBit_nodes > max_secVBit_nodes)
   1043          max_secVBit_nodes = n_secVBit_nodes;
   1044    }
   1045 }
   1046 
   1047 /* --------------- Endianness helpers --------------- */
   1048 
   1049 /* Returns the offset in memory of the byteno-th most significant byte
   1050    in a wordszB-sized word, given the specified endianness. */
   1051 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
   1052                                     UWord byteno ) {
   1053    return bigendian ? (wordszB-1-byteno) : byteno;
   1054 }
   1055 
   1056 
   1057 /* --------------- Ignored address ranges --------------- */
   1058 
   1059 #define M_IGNORE_RANGES 4
   1060 
   1061 typedef
   1062    struct {
   1063       Int  used;
   1064       Addr start[M_IGNORE_RANGES];
   1065       Addr end[M_IGNORE_RANGES];
   1066    }
   1067    IgnoreRanges;
   1068 
   1069 static IgnoreRanges ignoreRanges;
   1070 
   1071 INLINE Bool MC_(in_ignored_range) ( Addr a )
   1072 {
   1073    Int i;
   1074    if (LIKELY(ignoreRanges.used == 0))
   1075       return False;
   1076    for (i = 0; i < ignoreRanges.used; i++) {
   1077       if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i])
   1078          return True;
   1079    }
   1080    return False;
   1081 }
   1082 
   1083 /* Parse two Addr separated by a dash, or fail. */
   1084 
   1085 static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 )
   1086 {
   1087    Bool ok = VG_(parse_Addr) (ppc, result1);
   1088    if (!ok)
   1089       return False;
   1090    if (**ppc != '-')
   1091       return False;
   1092    (*ppc)++;
   1093    ok = VG_(parse_Addr) (ppc, result2);
   1094    if (!ok)
   1095       return False;
   1096    return True;
   1097 }
   1098 
   1099 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
   1100    fail. */
   1101 
   1102 static Bool parse_ignore_ranges ( UChar* str0 )
   1103 {
   1104    Addr start, end;
   1105    Bool ok;
   1106    UChar*  str = str0;
   1107    UChar** ppc = &str;
   1108    ignoreRanges.used = 0;
   1109    while (1) {
   1110       ok = parse_range(ppc, &start, &end);
   1111       if (!ok)
   1112          return False;
   1113       if (ignoreRanges.used >= M_IGNORE_RANGES)
   1114          return False;
   1115       ignoreRanges.start[ignoreRanges.used] = start;
   1116       ignoreRanges.end[ignoreRanges.used] = end;
   1117       ignoreRanges.used++;
   1118       if (**ppc == 0)
   1119          return True;
   1120       if (**ppc != ',')
   1121          return False;
   1122       (*ppc)++;
   1123    }
   1124    /*NOTREACHED*/
   1125    return False;
   1126 }
   1127 
   1128 
   1129 /* --------------- Load/store slow cases. --------------- */
   1130 
   1131 static
   1132 __attribute__((noinline))
   1133 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
   1134 {
   1135    PROF_EVENT(30, "mc_LOADVn_slow");
   1136 
   1137    /* ------------ BEGIN semi-fast cases ------------ */
   1138    /* These deal quickly-ish with the common auxiliary primary map
   1139       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1140       omitted without loss of correctness/functionality.  Note that in
   1141       both cases the "sizeof(void*) == 8" causes these cases to be
   1142       folded out by compilers on 32-bit platforms.  These are derived
   1143       from LOADV64 and LOADV32.
   1144    */
   1145    if (LIKELY(sizeof(void*) == 8
   1146                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1147       SecMap* sm       = get_secmap_for_reading(a);
   1148       UWord   sm_off16 = SM_OFF_16(a);
   1149       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1150       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
   1151          return V_BITS64_DEFINED;
   1152       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
   1153          return V_BITS64_UNDEFINED;
   1154       /* else fall into the slow case */
   1155    }
   1156    if (LIKELY(sizeof(void*) == 8
   1157                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1158       SecMap* sm = get_secmap_for_reading(a);
   1159       UWord sm_off = SM_OFF(a);
   1160       UWord vabits8 = sm->vabits8[sm_off];
   1161       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
   1162          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   1163       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
   1164          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   1165       /* else fall into slow case */
   1166    }
   1167    /* ------------ END semi-fast cases ------------ */
   1168 
   1169    ULong  vbits64     = V_BITS64_UNDEFINED; /* result */
   1170    ULong  pessim64    = V_BITS64_DEFINED;   /* only used when p-l-ok=yes */
   1171    SSizeT szB         = nBits / 8;
   1172    SSizeT i;          /* Must be signed. */
   1173    SizeT  n_addrs_bad = 0;
   1174    Addr   ai;
   1175    UChar  vbits8;
   1176    Bool   ok;
   1177 
   1178    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1179 
   1180    /* Make up a 64-bit result V word, which contains the loaded data
   1181       for valid addresses and Defined for invalid addresses.  Iterate
   1182       over the bytes in the word, from the most significant down to
   1183       the least.  The vbits to return are calculated into vbits64.
   1184       Also compute the pessimising value to be used when
   1185       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
   1186       info can be gleaned from pessim64) but is used as a
   1187       cross-check. */
   1188    for (i = szB-1; i >= 0; i--) {
   1189       PROF_EVENT(31, "mc_LOADVn_slow(loop)");
   1190       ai = a + byte_offset_w(szB, bigendian, i);
   1191       ok = get_vbits8(ai, &vbits8);
   1192       vbits64 <<= 8;
   1193       vbits64 |= vbits8;
   1194       if (!ok) n_addrs_bad++;
   1195       pessim64 <<= 8;
   1196       pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
   1197    }
   1198 
   1199    /* In the common case, all the addresses involved are valid, so we
   1200       just return the computed V bits and have done. */
   1201    if (LIKELY(n_addrs_bad == 0))
   1202       return vbits64;
   1203 
   1204    /* If there's no possibility of getting a partial-loads-ok
   1205       exemption, report the error and quit. */
   1206    if (!MC_(clo_partial_loads_ok)) {
   1207       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1208       return vbits64;
   1209    }
   1210 
   1211    /* The partial-loads-ok excemption might apply.  Find out if it
   1212       does.  If so, don't report an addressing error, but do return
   1213       Undefined for the bytes that are out of range, so as to avoid
   1214       false negatives.  If it doesn't apply, just report an addressing
   1215       error in the usual way. */
   1216 
   1217    /* Some code steps along byte strings in aligned word-sized chunks
   1218       even when there is only a partially defined word at the end (eg,
   1219       optimised strlen).  This is allowed by the memory model of
   1220       modern machines, since an aligned load cannot span two pages and
   1221       thus cannot "partially fault".  Despite such behaviour being
   1222       declared undefined by ANSI C/C++.
   1223 
   1224       Therefore, a load from a partially-addressible place is allowed
   1225       if all of the following hold:
   1226       - the command-line flag is set [by default, it isn't]
   1227       - it's a word-sized, word-aligned load
   1228       - at least one of the addresses in the word *is* valid
   1229 
   1230       Since this suppresses the addressing error, we avoid false
   1231       negatives by marking bytes undefined when they come from an
   1232       invalid address.
   1233    */
   1234 
   1235    /* "at least one of the addresses is invalid" */
   1236    tl_assert(pessim64 != V_BITS64_DEFINED);
   1237 
   1238    if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
   1239        && n_addrs_bad < VG_WORDSIZE) {
   1240       /* Exemption applies.  Use the previously computed pessimising
   1241          value for vbits64 and return the combined result, but don't
   1242          flag an addressing error.  The pessimising value is Defined
   1243          for valid addresses and Undefined for invalid addresses. */
   1244       /* for assumption that doing bitwise or implements UifU */
   1245       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1246       /* (really need "UifU" here...)
   1247          vbits64 UifU= pessim64  (is pessimised by it, iow) */
   1248       vbits64 |= pessim64;
   1249       return vbits64;
   1250    }
   1251 
   1252    /* Exemption doesn't apply.  Flag an addressing error in the normal
   1253       way. */
   1254    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1255 
   1256    return vbits64;
   1257 }
   1258 
   1259 
   1260 static
   1261 __attribute__((noinline))
   1262 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
   1263 {
   1264    SizeT szB = nBits / 8;
   1265    SizeT i, n_addrs_bad = 0;
   1266    UChar vbits8;
   1267    Addr  ai;
   1268    Bool  ok;
   1269 
   1270    PROF_EVENT(35, "mc_STOREVn_slow");
   1271 
   1272    /* ------------ BEGIN semi-fast cases ------------ */
   1273    /* These deal quickly-ish with the common auxiliary primary map
   1274       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1275       omitted without loss of correctness/functionality.  Note that in
   1276       both cases the "sizeof(void*) == 8" causes these cases to be
   1277       folded out by compilers on 32-bit platforms.  These are derived
   1278       from STOREV64 and STOREV32.
   1279    */
   1280    if (LIKELY(sizeof(void*) == 8
   1281                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1282       SecMap* sm       = get_secmap_for_reading(a);
   1283       UWord   sm_off16 = SM_OFF_16(a);
   1284       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1285       if (LIKELY( !is_distinguished_sm(sm) &&
   1286                           (VA_BITS16_DEFINED   == vabits16 ||
   1287                            VA_BITS16_UNDEFINED == vabits16) )) {
   1288          /* Handle common case quickly: a is suitably aligned, */
   1289          /* is mapped, and is addressible. */
   1290          // Convert full V-bits in register to compact 2-bit form.
   1291          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
   1292             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
   1293             return;
   1294          } else if (V_BITS64_UNDEFINED == vbytes) {
   1295             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
   1296             return;
   1297          }
   1298          /* else fall into the slow case */
   1299       }
   1300       /* else fall into the slow case */
   1301    }
   1302    if (LIKELY(sizeof(void*) == 8
   1303                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1304       SecMap* sm      = get_secmap_for_reading(a);
   1305       UWord   sm_off  = SM_OFF(a);
   1306       UWord   vabits8 = sm->vabits8[sm_off];
   1307       if (LIKELY( !is_distinguished_sm(sm) &&
   1308                           (VA_BITS8_DEFINED   == vabits8 ||
   1309                            VA_BITS8_UNDEFINED == vabits8) )) {
   1310          /* Handle common case quickly: a is suitably aligned, */
   1311          /* is mapped, and is addressible. */
   1312          // Convert full V-bits in register to compact 2-bit form.
   1313          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
   1314             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
   1315             return;
   1316          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
   1317             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   1318             return;
   1319          }
   1320          /* else fall into the slow case */
   1321       }
   1322       /* else fall into the slow case */
   1323    }
   1324    /* ------------ END semi-fast cases ------------ */
   1325 
   1326    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1327 
   1328    /* Dump vbytes in memory, iterating from least to most significant
   1329       byte.  At the same time establish addressibility of the location. */
   1330    for (i = 0; i < szB; i++) {
   1331       PROF_EVENT(36, "mc_STOREVn_slow(loop)");
   1332       ai     = a + byte_offset_w(szB, bigendian, i);
   1333       vbits8 = vbytes & 0xff;
   1334       ok     = set_vbits8(ai, vbits8);
   1335       if (!ok) n_addrs_bad++;
   1336       vbytes >>= 8;
   1337    }
   1338 
   1339    /* If an address error has happened, report it. */
   1340    if (n_addrs_bad > 0)
   1341       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
   1342 }
   1343 
   1344 
   1345 /*------------------------------------------------------------*/
   1346 /*--- Setting permissions over address ranges.             ---*/
   1347 /*------------------------------------------------------------*/
   1348 
   1349 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
   1350                                       UWord dsm_num )
   1351 {
   1352    UWord    sm_off, sm_off16;
   1353    UWord    vabits2 = vabits16 & 0x3;
   1354    SizeT    lenA, lenB, len_to_next_secmap;
   1355    Addr     aNext;
   1356    SecMap*  sm;
   1357    SecMap** sm_ptr;
   1358    SecMap*  example_dsm;
   1359 
   1360    PROF_EVENT(150, "set_address_range_perms");
   1361 
   1362    /* Check the V+A bits make sense. */
   1363    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
   1364              VA_BITS16_UNDEFINED == vabits16 ||
   1365              VA_BITS16_DEFINED   == vabits16);
   1366 
   1367    // This code should never write PDBs;  ensure this.  (See comment above
   1368    // set_vabits2().)
   1369    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
   1370 
   1371    if (lenT == 0)
   1372       return;
   1373 
   1374    if (lenT > 256 * 1024 * 1024) {
   1375       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
   1376          Char* s = "unknown???";
   1377          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
   1378          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
   1379          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
   1380          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
   1381                                   "large range [0x%lx, 0x%lx) (%s)\n",
   1382                                   a, a + lenT, s);
   1383       }
   1384    }
   1385 
   1386 #ifndef PERF_FAST_SARP
   1387    /*------------------ debug-only case ------------------ */
   1388    {
   1389       // Endianness doesn't matter here because all bytes are being set to
   1390       // the same value.
   1391       // Nb: We don't have to worry about updating the sec-V-bits table
   1392       // after these set_vabits2() calls because this code never writes
   1393       // VA_BITS2_PARTDEFINED values.
   1394       SizeT i;
   1395       for (i = 0; i < lenT; i++) {
   1396          set_vabits2(a + i, vabits2);
   1397       }
   1398       return;
   1399    }
   1400 #endif
   1401 
   1402    /*------------------ standard handling ------------------ */
   1403 
   1404    /* Get the distinguished secondary that we might want
   1405       to use (part of the space-compression scheme). */
   1406    example_dsm = &sm_distinguished[dsm_num];
   1407 
   1408    // We have to handle ranges covering various combinations of partial and
   1409    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
   1410    // Cases marked with a '*' are common.
   1411    //
   1412    //   TYPE                                             PARTS USED
   1413    //   ----                                             ----------
   1414    // * one partial sec-map                  (p)         1
   1415    // - one whole sec-map                    (P)         2
   1416    //
   1417    // * two partial sec-maps                 (pp)        1,3
   1418    // - one partial, one whole sec-map       (pP)        1,2
   1419    // - one whole, one partial sec-map       (Pp)        2,3
   1420    // - two whole sec-maps                   (PP)        2,2
   1421    //
   1422    // * one partial, one whole, one partial  (pPp)       1,2,3
   1423    // - one partial, two whole               (pPP)       1,2,2
   1424    // - two whole, one partial               (PPp)       2,2,3
   1425    // - three whole                          (PPP)       2,2,2
   1426    //
   1427    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
   1428    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
   1429    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
   1430    // - N whole                              (PP...PP)   2,2...2,3
   1431 
   1432    // Break up total length (lenT) into two parts:  length in the first
   1433    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
   1434    aNext = start_of_this_sm(a) + SM_SIZE;
   1435    len_to_next_secmap = aNext - a;
   1436    if ( lenT <= len_to_next_secmap ) {
   1437       // Range entirely within one sec-map.  Covers almost all cases.
   1438       PROF_EVENT(151, "set_address_range_perms-single-secmap");
   1439       lenA = lenT;
   1440       lenB = 0;
   1441    } else if (is_start_of_sm(a)) {
   1442       // Range spans at least one whole sec-map, and starts at the beginning
   1443       // of a sec-map; skip to Part 2.
   1444       PROF_EVENT(152, "set_address_range_perms-startof-secmap");
   1445       lenA = 0;
   1446       lenB = lenT;
   1447       goto part2;
   1448    } else {
   1449       // Range spans two or more sec-maps, first one is partial.
   1450       PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
   1451       lenA = len_to_next_secmap;
   1452       lenB = lenT - lenA;
   1453    }
   1454 
   1455    //------------------------------------------------------------------------
   1456    // Part 1: Deal with the first sec_map.  Most of the time the range will be
   1457    // entirely within a sec_map and this part alone will suffice.  Also,
   1458    // doing it this way lets us avoid repeatedly testing for the crossing of
   1459    // a sec-map boundary within these loops.
   1460    //------------------------------------------------------------------------
   1461 
   1462    // If it's distinguished, make it undistinguished if necessary.
   1463    sm_ptr = get_secmap_ptr(a);
   1464    if (is_distinguished_sm(*sm_ptr)) {
   1465       if (*sm_ptr == example_dsm) {
   1466          // Sec-map already has the V+A bits that we want, so skip.
   1467          PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
   1468          a    = aNext;
   1469          lenA = 0;
   1470       } else {
   1471          PROF_EVENT(155, "set_address_range_perms-dist-sm1");
   1472          *sm_ptr = copy_for_writing(*sm_ptr);
   1473       }
   1474    }
   1475    sm = *sm_ptr;
   1476 
   1477    // 1 byte steps
   1478    while (True) {
   1479       if (VG_IS_8_ALIGNED(a)) break;
   1480       if (lenA < 1)           break;
   1481       PROF_EVENT(156, "set_address_range_perms-loop1a");
   1482       sm_off = SM_OFF(a);
   1483       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1484       a    += 1;
   1485       lenA -= 1;
   1486    }
   1487    // 8-aligned, 8 byte steps
   1488    while (True) {
   1489       if (lenA < 8) break;
   1490       PROF_EVENT(157, "set_address_range_perms-loop8a");
   1491       sm_off16 = SM_OFF_16(a);
   1492       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1493       a    += 8;
   1494       lenA -= 8;
   1495    }
   1496    // 1 byte steps
   1497    while (True) {
   1498       if (lenA < 1) break;
   1499       PROF_EVENT(158, "set_address_range_perms-loop1b");
   1500       sm_off = SM_OFF(a);
   1501       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1502       a    += 1;
   1503       lenA -= 1;
   1504    }
   1505 
   1506    // We've finished the first sec-map.  Is that it?
   1507    if (lenB == 0)
   1508       return;
   1509 
   1510    //------------------------------------------------------------------------
   1511    // Part 2: Fast-set entire sec-maps at a time.
   1512    //------------------------------------------------------------------------
   1513   part2:
   1514    // 64KB-aligned, 64KB steps.
   1515    // Nb: we can reach here with lenB < SM_SIZE
   1516    tl_assert(0 == lenA);
   1517    while (True) {
   1518       if (lenB < SM_SIZE) break;
   1519       tl_assert(is_start_of_sm(a));
   1520       PROF_EVENT(159, "set_address_range_perms-loop64K");
   1521       sm_ptr = get_secmap_ptr(a);
   1522       if (!is_distinguished_sm(*sm_ptr)) {
   1523          PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
   1524          // Free the non-distinguished sec-map that we're replacing.  This
   1525          // case happens moderately often, enough to be worthwhile.
   1526          SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
   1527          tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
   1528       }
   1529       update_SM_counts(*sm_ptr, example_dsm);
   1530       // Make the sec-map entry point to the example DSM
   1531       *sm_ptr = example_dsm;
   1532       lenB -= SM_SIZE;
   1533       a    += SM_SIZE;
   1534    }
   1535 
   1536    // We've finished the whole sec-maps.  Is that it?
   1537    if (lenB == 0)
   1538       return;
   1539 
   1540    //------------------------------------------------------------------------
   1541    // Part 3: Finish off the final partial sec-map, if necessary.
   1542    //------------------------------------------------------------------------
   1543 
   1544    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
   1545 
   1546    // If it's distinguished, make it undistinguished if necessary.
   1547    sm_ptr = get_secmap_ptr(a);
   1548    if (is_distinguished_sm(*sm_ptr)) {
   1549       if (*sm_ptr == example_dsm) {
   1550          // Sec-map already has the V+A bits that we want, so stop.
   1551          PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
   1552          return;
   1553       } else {
   1554          PROF_EVENT(162, "set_address_range_perms-dist-sm2");
   1555          *sm_ptr = copy_for_writing(*sm_ptr);
   1556       }
   1557    }
   1558    sm = *sm_ptr;
   1559 
   1560    // 8-aligned, 8 byte steps
   1561    while (True) {
   1562       if (lenB < 8) break;
   1563       PROF_EVENT(163, "set_address_range_perms-loop8b");
   1564       sm_off16 = SM_OFF_16(a);
   1565       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1566       a    += 8;
   1567       lenB -= 8;
   1568    }
   1569    // 1 byte steps
   1570    while (True) {
   1571       if (lenB < 1) return;
   1572       PROF_EVENT(164, "set_address_range_perms-loop1c");
   1573       sm_off = SM_OFF(a);
   1574       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1575       a    += 1;
   1576       lenB -= 1;
   1577    }
   1578 }
   1579 
   1580 
   1581 /* --- Set permissions for arbitrary address ranges --- */
   1582 
   1583 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
   1584 {
   1585    PROF_EVENT(40, "MC_(make_mem_noaccess)");
   1586    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
   1587    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
   1588    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1589       ocache_sarp_Clear_Origins ( a, len );
   1590 }
   1591 
   1592 static void make_mem_undefined ( Addr a, SizeT len )
   1593 {
   1594    PROF_EVENT(41, "make_mem_undefined");
   1595    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
   1596    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1597 }
   1598 
   1599 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
   1600 {
   1601    PROF_EVENT(41, "MC_(make_mem_undefined)");
   1602    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
   1603    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1604    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1605       ocache_sarp_Set_Origins ( a, len, otag );
   1606 }
   1607 
   1608 static
   1609 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
   1610                                           ThreadId tid, UInt okind )
   1611 {
   1612    UInt        ecu;
   1613    ExeContext* here;
   1614    /* VG_(record_ExeContext) checks for validity of tid, and asserts
   1615       if it is invalid.  So no need to do it here. */
   1616    tl_assert(okind <= 3);
   1617    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
   1618    tl_assert(here);
   1619    ecu = VG_(get_ECU_from_ExeContext)(here);
   1620    tl_assert(VG_(is_plausible_ECU)(ecu));
   1621    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
   1622 }
   1623 
   1624 static
   1625 void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) {
   1626    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
   1627 }
   1628 
   1629 
   1630 void MC_(make_mem_defined) ( Addr a, SizeT len )
   1631 {
   1632    PROF_EVENT(42, "MC_(make_mem_defined)");
   1633    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
   1634    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
   1635    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1636       ocache_sarp_Clear_Origins ( a, len );
   1637 }
   1638 
   1639 /* For each byte in [a,a+len), if the byte is addressable, make it be
   1640    defined, but if it isn't addressible, leave it alone.  In other
   1641    words a version of MC_(make_mem_defined) that doesn't mess with
   1642    addressibility.  Low-performance implementation. */
   1643 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
   1644 {
   1645    SizeT i;
   1646    UChar vabits2;
   1647    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
   1648    for (i = 0; i < len; i++) {
   1649       vabits2 = get_vabits2( a+i );
   1650       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
   1651          set_vabits2(a+i, VA_BITS2_DEFINED);
   1652          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1653             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1654          }
   1655       }
   1656    }
   1657 }
   1658 
   1659 /* Similarly (needed for mprotect handling ..) */
   1660 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
   1661 {
   1662    SizeT i;
   1663    UChar vabits2;
   1664    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
   1665    for (i = 0; i < len; i++) {
   1666       vabits2 = get_vabits2( a+i );
   1667       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
   1668          set_vabits2(a+i, VA_BITS2_DEFINED);
   1669          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1670             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1671          }
   1672       }
   1673    }
   1674 }
   1675 
   1676 /* --- Block-copy permissions (needed for implementing realloc() and
   1677        sys_mremap). --- */
   1678 
   1679 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
   1680 {
   1681    SizeT i, j;
   1682    UChar vabits2, vabits8;
   1683    Bool  aligned, nooverlap;
   1684 
   1685    DEBUG("MC_(copy_address_range_state)\n");
   1686    PROF_EVENT(50, "MC_(copy_address_range_state)");
   1687 
   1688    if (len == 0 || src == dst)
   1689       return;
   1690 
   1691    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
   1692    nooverlap = src+len <= dst || dst+len <= src;
   1693 
   1694    if (nooverlap && aligned) {
   1695 
   1696       /* Vectorised fast case, when no overlap and suitably aligned */
   1697       /* vector loop */
   1698       i = 0;
   1699       while (len >= 4) {
   1700          vabits8 = get_vabits8_for_aligned_word32( src+i );
   1701          set_vabits8_for_aligned_word32( dst+i, vabits8 );
   1702          if (LIKELY(VA_BITS8_DEFINED == vabits8
   1703                             || VA_BITS8_UNDEFINED == vabits8
   1704                             || VA_BITS8_NOACCESS == vabits8)) {
   1705             /* do nothing */
   1706          } else {
   1707             /* have to copy secondary map info */
   1708             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
   1709                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
   1710             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
   1711                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
   1712             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
   1713                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
   1714             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
   1715                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
   1716          }
   1717          i += 4;
   1718          len -= 4;
   1719       }
   1720       /* fixup loop */
   1721       while (len >= 1) {
   1722          vabits2 = get_vabits2( src+i );
   1723          set_vabits2( dst+i, vabits2 );
   1724          if (VA_BITS2_PARTDEFINED == vabits2) {
   1725             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1726          }
   1727          i++;
   1728          len--;
   1729       }
   1730 
   1731    } else {
   1732 
   1733       /* We have to do things the slow way */
   1734       if (src < dst) {
   1735          for (i = 0, j = len-1; i < len; i++, j--) {
   1736             PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
   1737             vabits2 = get_vabits2( src+j );
   1738             set_vabits2( dst+j, vabits2 );
   1739             if (VA_BITS2_PARTDEFINED == vabits2) {
   1740                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
   1741             }
   1742          }
   1743       }
   1744 
   1745       if (src > dst) {
   1746          for (i = 0; i < len; i++) {
   1747             PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
   1748             vabits2 = get_vabits2( src+i );
   1749             set_vabits2( dst+i, vabits2 );
   1750             if (VA_BITS2_PARTDEFINED == vabits2) {
   1751                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1752             }
   1753          }
   1754       }
   1755    }
   1756 
   1757 }
   1758 
   1759 
   1760 /*------------------------------------------------------------*/
   1761 /*--- Origin tracking stuff - cache basics                 ---*/
   1762 /*------------------------------------------------------------*/
   1763 
   1764 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
   1765    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1766 
   1767    Note that this implementation draws inspiration from the "origin
   1768    tracking by value piggybacking" scheme described in "Tracking Bad
   1769    Apples: Reporting the Origin of Null and Undefined Value Errors"
   1770    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
   1771    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
   1772    implemented completely differently.
   1773 
   1774    Origin tags and ECUs -- about the shadow values
   1775    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1776 
   1777    This implementation tracks the defining point of all uninitialised
   1778    values using so called "origin tags", which are 32-bit integers,
   1779    rather than using the values themselves to encode the origins.  The
   1780    latter, so-called value piggybacking", is what the OOPSLA07 paper
   1781    describes.
   1782 
   1783    Origin tags, as tracked by the machinery below, are 32-bit unsigned
   1784    ints (UInts), regardless of the machine's word size.  Each tag
   1785    comprises an upper 30-bit ECU field and a lower 2-bit
   1786    'kind' field.  The ECU field is a number given out by m_execontext
   1787    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
   1788    directly as an origin tag (otag), but in fact we want to put
   1789    additional information 'kind' field to indicate roughly where the
   1790    tag came from.  This helps print more understandable error messages
   1791    for the user -- it has no other purpose.  In summary:
   1792 
   1793    * Both ECUs and origin tags are represented as 32-bit words
   1794 
   1795    * m_execontext and the core-tool interface deal purely in ECUs.
   1796      They have no knowledge of origin tags - that is a purely
   1797      Memcheck-internal matter.
   1798 
   1799    * all valid ECUs have the lowest 2 bits zero and at least
   1800      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
   1801 
   1802    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
   1803      constants defined in mc_include.h.
   1804 
   1805    * to convert an otag back to an ECU, AND it with ~3
   1806 
   1807    One important fact is that no valid otag is zero.  A zero otag is
   1808    used by the implementation to indicate "no origin", which could
   1809    mean that either the value is defined, or it is undefined but the
   1810    implementation somehow managed to lose the origin.
   1811 
   1812    The ECU used for memory created by malloc etc is derived from the
   1813    stack trace at the time the malloc etc happens.  This means the
   1814    mechanism can show the exact allocation point for heap-created
   1815    uninitialised values.
   1816 
   1817    In contrast, it is simply too expensive to create a complete
   1818    backtrace for each stack allocation.  Therefore we merely use a
   1819    depth-1 backtrace for stack allocations, which can be done once at
   1820    translation time, rather than N times at run time.  The result of
   1821    this is that, for stack created uninitialised values, Memcheck can
   1822    only show the allocating function, and not what called it.
   1823    Furthermore, compilers tend to move the stack pointer just once at
   1824    the start of the function, to allocate all locals, and so in fact
   1825    the stack origin almost always simply points to the opening brace
   1826    of the function.  Net result is, for stack origins, the mechanism
   1827    can tell you in which function the undefined value was created, but
   1828    that's all.  Users will need to carefully check all locals in the
   1829    specified function.
   1830 
   1831    Shadowing registers and memory
   1832    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1833 
   1834    Memory is shadowed using a two level cache structure (ocacheL1 and
   1835    ocacheL2).  Memory references are first directed to ocacheL1.  This
   1836    is a traditional 2-way set associative cache with 32-byte lines and
   1837    approximate LRU replacement within each set.
   1838 
   1839    A naive implementation would require storing one 32 bit otag for
   1840    each byte of memory covered, a 4:1 space overhead.  Instead, there
   1841    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
   1842    that shows which of the 4 bytes have that shadow value and which
   1843    have a shadow value of zero (indicating no origin).  Hence a lot of
   1844    space is saved, but the cost is that only one different origin per
   1845    4 bytes of address space can be represented.  This is a source of
   1846    imprecision, but how much of a problem it really is remains to be
   1847    seen.
   1848 
   1849    A cache line that contains all zeroes ("no origins") contains no
   1850    useful information, and can be ejected from the L1 cache "for
   1851    free", in the sense that a read miss on the L1 causes a line of
   1852    zeroes to be installed.  However, ejecting a line containing
   1853    nonzeroes risks losing origin information permanently.  In order to
   1854    prevent such lossage, ejected nonzero lines are placed in a
   1855    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
   1856    lines.  This can grow arbitrarily large, and so should ensure that
   1857    Memcheck runs out of memory in preference to losing useful origin
   1858    info due to cache size limitations.
   1859 
   1860    Shadowing registers is a bit tricky, because the shadow values are
   1861    32 bits, regardless of the size of the register.  That gives a
   1862    problem for registers smaller than 32 bits.  The solution is to
   1863    find spaces in the guest state that are unused, and use those to
   1864    shadow guest state fragments smaller than 32 bits.  For example, on
   1865    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
   1866    shadow are allocated for the register's otag, then there are still
   1867    12 bytes left over which could be used to shadow 3 other values.
   1868 
   1869    This implies there is some non-obvious mapping from guest state
   1870    (start,length) pairs to the relevant shadow offset (for the origin
   1871    tags).  And it is unfortunately guest-architecture specific.  The
   1872    mapping is contained in mc_machine.c, which is quite lengthy but
   1873    straightforward.
   1874 
   1875    Instrumenting the IR
   1876    ~~~~~~~~~~~~~~~~~~~~
   1877 
   1878    Instrumentation is largely straightforward, and done by the
   1879    functions schemeE and schemeS in mc_translate.c.  These generate
   1880    code for handling the origin tags of expressions (E) and statements
   1881    (S) respectively.  The rather strange names are a reference to the
   1882    "compilation schemes" shown in Simon Peyton Jones' book "The
   1883    Implementation of Functional Programming Languages" (Prentice Hall,
   1884    1987, see
   1885    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
   1886 
   1887    schemeS merely arranges to move shadow values around the guest
   1888    state to track the incoming IR.  schemeE is largely trivial too.
   1889    The only significant point is how to compute the otag corresponding
   1890    to binary (or ternary, quaternary, etc) operator applications.  The
   1891    rule is simple: just take whichever value is larger (32-bit
   1892    unsigned max).  Constants get the special value zero.  Hence this
   1893    rule always propagates a nonzero (known) otag in preference to a
   1894    zero (unknown, or more likely, value-is-defined) tag, as we want.
   1895    If two different undefined values are inputs to a binary operator
   1896    application, then which is propagated is arbitrary, but that
   1897    doesn't matter, since the program is erroneous in using either of
   1898    the values, and so there's no point in attempting to propagate
   1899    both.
   1900 
   1901    Since constants are abstracted to (otag) zero, much of the
   1902    instrumentation code can be folded out without difficulty by the
   1903    generic post-instrumentation IR cleanup pass, using these rules:
   1904    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
   1905    constants is evaluated at JIT time.  And the resulting dead code
   1906    removal.  In practice this causes surprisingly few Max32Us to
   1907    survive through to backend code generation.
   1908 
   1909    Integration with the V-bits machinery
   1910    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1911 
   1912    This is again largely straightforward.  Mostly the otag and V bits
   1913    stuff are independent.  The only point of interaction is when the V
   1914    bits instrumenter creates a call to a helper function to report an
   1915    uninitialised value error -- in that case it must first use schemeE
   1916    to get hold of the origin tag expression for the value, and pass
   1917    that to the helper too.
   1918 
   1919    There is the usual stuff to do with setting address range
   1920    permissions.  When memory is painted undefined, we must also know
   1921    the origin tag to paint with, which involves some tedious plumbing,
   1922    particularly to do with the fast case stack handlers.  When memory
   1923    is painted defined or noaccess then the origin tags must be forced
   1924    to zero.
   1925 
   1926    One of the goals of the implementation was to ensure that the
   1927    non-origin tracking mode isn't slowed down at all.  To do this,
   1928    various functions to do with memory permissions setting (again,
   1929    mostly pertaining to the stack) are duplicated for the with- and
   1930    without-otag case.
   1931 
   1932    Dealing with stack redzones, and the NIA cache
   1933    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1934 
   1935    This is one of the few non-obvious parts of the implementation.
   1936 
   1937    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
   1938    reserved area below the stack pointer, that can be used as scratch
   1939    space by compiler generated code for functions.  In the Memcheck
   1940    sources this is referred to as the "stack redzone".  The important
   1941    thing here is that such redzones are considered volatile across
   1942    function calls and returns.  So Memcheck takes care to mark them as
   1943    undefined for each call and return, on the afflicted platforms.
   1944    Past experience shows this is essential in order to get reliable
   1945    messages about uninitialised values that come from the stack.
   1946 
   1947    So the question is, when we paint a redzone undefined, what origin
   1948    tag should we use for it?  Consider a function f() calling g().  If
   1949    we paint the redzone using an otag derived from the ExeContext of
   1950    the CALL/BL instruction in f, then any errors in g causing it to
   1951    use uninitialised values that happen to lie in the redzone, will be
   1952    reported as having their origin in f.  Which is highly confusing.
   1953 
   1954    The same applies for returns: if, on a return, we paint the redzone
   1955    using a origin tag derived from the ExeContext of the RET/BLR
   1956    instruction in g, then any later errors in f causing it to use
   1957    uninitialised values in the redzone, will be reported as having
   1958    their origin in g.  Which is just as confusing.
   1959 
   1960    To do it right, in both cases we need to use an origin tag which
   1961    pertains to the instruction which dynamically follows the CALL/BL
   1962    or RET/BLR.  In short, one derived from the NIA - the "next
   1963    instruction address".
   1964 
   1965    To make this work, Memcheck's redzone-painting helper,
   1966    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
   1967    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
   1968    ExeContext's ECU as the basis for the otag used to paint the
   1969    redzone.  The expensive part of this is converting an NIA into an
   1970    ECU, since this happens once for every call and every return.  So
   1971    we use a simple 511-line, 2-way set associative cache
   1972    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
   1973    the cost out.
   1974 
   1975    Further background comments
   1976    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1977 
   1978    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
   1979    > it really just the address of the relevant ExeContext?
   1980 
   1981    Well, it's not the address, but a value which has a 1-1 mapping
   1982    with ExeContexts, and is guaranteed not to be zero, since zero
   1983    denotes (to memcheck) "unknown origin or defined value".  So these
   1984    UInts are just numbers starting at 4 and incrementing by 4; each
   1985    ExeContext is given a number when it is created.  (*** NOTE this
   1986    confuses otags and ECUs; see comments above ***).
   1987 
   1988    Making these otags 32-bit regardless of the machine's word size
   1989    makes the 64-bit implementation easier (next para).  And it doesn't
   1990    really limit us in any way, since for the tags to overflow would
   1991    require that the program somehow caused 2^30-1 different
   1992    ExeContexts to be created, in which case it is probably in deep
   1993    trouble.  Not to mention V will have soaked up many tens of
   1994    gigabytes of memory merely to store them all.
   1995 
   1996    So having 64-bit origins doesn't really buy you anything, and has
   1997    the following downsides:
   1998 
   1999    Suppose that instead, an otag is a UWord.  This would mean that, on
   2000    a 64-bit target,
   2001 
   2002    1. It becomes hard to shadow any element of guest state which is
   2003       smaller than 8 bytes.  To do so means you'd need to find some
   2004       8-byte-sized hole in the guest state which you don't want to
   2005       shadow, and use that instead to hold the otag.  On ppc64, the
   2006       condition code register(s) are split into 20 UChar sized pieces,
   2007       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
   2008       and so that would entail finding 160 bytes somewhere else in the
   2009       guest state.
   2010 
   2011       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
   2012       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
   2013       same) and so I had to look for 4 untracked otag-sized areas in
   2014       the guest state to make that possible.
   2015 
   2016       The same problem exists of course when origin tags are only 32
   2017       bits, but it's less extreme.
   2018 
   2019    2. (More compelling) it doubles the size of the origin shadow
   2020       memory.  Given that the shadow memory is organised as a fixed
   2021       size cache, and that accuracy of tracking is limited by origins
   2022       falling out the cache due to space conflicts, this isn't good.
   2023 
   2024    > Another question: is the origin tracking perfect, or are there
   2025    > cases where it fails to determine an origin?
   2026 
   2027    It is imperfect for at least for the following reasons, and
   2028    probably more:
   2029 
   2030    * Insufficient capacity in the origin cache.  When a line is
   2031      evicted from the cache it is gone forever, and so subsequent
   2032      queries for the line produce zero, indicating no origin
   2033      information.  Interestingly, a line containing all zeroes can be
   2034      evicted "free" from the cache, since it contains no useful
   2035      information, so there is scope perhaps for some cleverer cache
   2036      management schemes.  (*** NOTE, with the introduction of the
   2037      second level origin tag cache, ocacheL2, this is no longer a
   2038      problem. ***)
   2039 
   2040    * The origin cache only stores one otag per 32-bits of address
   2041      space, plus 4 bits indicating which of the 4 bytes has that tag
   2042      and which are considered defined.  The result is that if two
   2043      undefined bytes in the same word are stored in memory, the first
   2044      stored byte's origin will be lost and replaced by the origin for
   2045      the second byte.
   2046 
   2047    * Nonzero origin tags for defined values.  Consider a binary
   2048      operator application op(x,y).  Suppose y is undefined (and so has
   2049      a valid nonzero origin tag), and x is defined, but erroneously
   2050      has a nonzero origin tag (defined values should have tag zero).
   2051      If the erroneous tag has a numeric value greater than y's tag,
   2052      then the rule for propagating origin tags though binary
   2053      operations, which is simply to take the unsigned max of the two
   2054      tags, will erroneously propagate x's tag rather than y's.
   2055 
   2056    * Some obscure uses of x86/amd64 byte registers can cause lossage
   2057      or confusion of origins.  %AH .. %DH are treated as different
   2058      from, and unrelated to, their parent registers, %EAX .. %EDX.
   2059      So some wierd sequences like
   2060 
   2061         movb undefined-value, %AH
   2062         movb defined-value, %AL
   2063         .. use %AX or %EAX ..
   2064 
   2065      will cause the origin attributed to %AH to be ignored, since %AL,
   2066      %AX, %EAX are treated as the same register, and %AH as a
   2067      completely separate one.
   2068 
   2069    But having said all that, it actually seems to work fairly well in
   2070    practice.
   2071 */
   2072 
   2073 static UWord stats_ocacheL1_find           = 0;
   2074 static UWord stats_ocacheL1_found_at_1     = 0;
   2075 static UWord stats_ocacheL1_found_at_N     = 0;
   2076 static UWord stats_ocacheL1_misses         = 0;
   2077 static UWord stats_ocacheL1_lossage        = 0;
   2078 static UWord stats_ocacheL1_movefwds       = 0;
   2079 
   2080 static UWord stats__ocacheL2_refs          = 0;
   2081 static UWord stats__ocacheL2_misses        = 0;
   2082 static UWord stats__ocacheL2_n_nodes_max   = 0;
   2083 
   2084 /* Cache of 32-bit values, one every 32 bits of address space */
   2085 
   2086 #define OC_BITS_PER_LINE 5
   2087 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
   2088 
   2089 static INLINE UWord oc_line_offset ( Addr a ) {
   2090    return (a >> 2) & (OC_W32S_PER_LINE - 1);
   2091 }
   2092 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
   2093    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
   2094 }
   2095 
   2096 #define OC_LINES_PER_SET 2
   2097 
   2098 #define OC_N_SET_BITS    20
   2099 #define OC_N_SETS        (1 << OC_N_SET_BITS)
   2100 
   2101 /* These settings give:
   2102    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
   2103    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
   2104 */
   2105 
   2106 #define OC_MOVE_FORWARDS_EVERY_BITS 7
   2107 
   2108 
   2109 typedef
   2110    struct {
   2111       Addr  tag;
   2112       UInt  w32[OC_W32S_PER_LINE];
   2113       UChar descr[OC_W32S_PER_LINE];
   2114    }
   2115    OCacheLine;
   2116 
   2117 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
   2118    in use, 'n' (nonzero) if it contains at least one valid origin tag,
   2119    and 'z' if all the represented tags are zero. */
   2120 static UChar classify_OCacheLine ( OCacheLine* line )
   2121 {
   2122    UWord i;
   2123    if (line->tag == 1/*invalid*/)
   2124       return 'e'; /* EMPTY */
   2125    tl_assert(is_valid_oc_tag(line->tag));
   2126    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2127       tl_assert(0 == ((~0xF) & line->descr[i]));
   2128       if (line->w32[i] > 0 && line->descr[i] > 0)
   2129          return 'n'; /* NONZERO - contains useful info */
   2130    }
   2131    return 'z'; /* ZERO - no useful info */
   2132 }
   2133 
   2134 typedef
   2135    struct {
   2136       OCacheLine line[OC_LINES_PER_SET];
   2137    }
   2138    OCacheSet;
   2139 
   2140 typedef
   2141    struct {
   2142       OCacheSet set[OC_N_SETS];
   2143    }
   2144    OCache;
   2145 
   2146 static OCache* ocacheL1 = NULL;
   2147 static UWord   ocacheL1_event_ctr = 0;
   2148 
   2149 static void init_ocacheL2 ( void ); /* fwds */
   2150 static void init_OCache ( void )
   2151 {
   2152    UWord line, set;
   2153    tl_assert(MC_(clo_mc_level) >= 3);
   2154    tl_assert(ocacheL1 == NULL);
   2155    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
   2156    if (ocacheL1 == NULL) {
   2157       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
   2158                                    sizeof(OCache) );
   2159    }
   2160    tl_assert(ocacheL1 != NULL);
   2161    for (set = 0; set < OC_N_SETS; set++) {
   2162       for (line = 0; line < OC_LINES_PER_SET; line++) {
   2163          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
   2164       }
   2165    }
   2166    init_ocacheL2();
   2167 }
   2168 
   2169 static void moveLineForwards ( OCacheSet* set, UWord lineno )
   2170 {
   2171    OCacheLine tmp;
   2172    stats_ocacheL1_movefwds++;
   2173    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
   2174    tmp = set->line[lineno-1];
   2175    set->line[lineno-1] = set->line[lineno];
   2176    set->line[lineno] = tmp;
   2177 }
   2178 
   2179 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
   2180    UWord i;
   2181    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2182       line->w32[i] = 0; /* NO ORIGIN */
   2183       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
   2184    }
   2185    line->tag = tag;
   2186 }
   2187 
   2188 //////////////////////////////////////////////////////////////
   2189 //// OCache backing store
   2190 
   2191 static OSet* ocacheL2 = NULL;
   2192 
   2193 static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) {
   2194    return VG_(malloc)(cc, szB);
   2195 }
   2196 static void ocacheL2_free ( void* v ) {
   2197    VG_(free)( v );
   2198 }
   2199 
   2200 /* Stats: # nodes currently in tree */
   2201 static UWord stats__ocacheL2_n_nodes = 0;
   2202 
   2203 static void init_ocacheL2 ( void )
   2204 {
   2205    tl_assert(!ocacheL2);
   2206    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
   2207    tl_assert(0 == offsetof(OCacheLine,tag));
   2208    ocacheL2
   2209       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
   2210                              NULL, /* fast cmp */
   2211                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
   2212    tl_assert(ocacheL2);
   2213    stats__ocacheL2_n_nodes = 0;
   2214 }
   2215 
   2216 /* Find line with the given tag in the tree, or NULL if not found. */
   2217 static OCacheLine* ocacheL2_find_tag ( Addr tag )
   2218 {
   2219    OCacheLine* line;
   2220    tl_assert(is_valid_oc_tag(tag));
   2221    stats__ocacheL2_refs++;
   2222    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
   2223    return line;
   2224 }
   2225 
   2226 /* Delete the line with the given tag from the tree, if it is present, and
   2227    free up the associated memory. */
   2228 static void ocacheL2_del_tag ( Addr tag )
   2229 {
   2230    OCacheLine* line;
   2231    tl_assert(is_valid_oc_tag(tag));
   2232    stats__ocacheL2_refs++;
   2233    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
   2234    if (line) {
   2235       VG_(OSetGen_FreeNode)(ocacheL2, line);
   2236       tl_assert(stats__ocacheL2_n_nodes > 0);
   2237       stats__ocacheL2_n_nodes--;
   2238    }
   2239 }
   2240 
   2241 /* Add a copy of the given line to the tree.  It must not already be
   2242    present. */
   2243 static void ocacheL2_add_line ( OCacheLine* line )
   2244 {
   2245    OCacheLine* copy;
   2246    tl_assert(is_valid_oc_tag(line->tag));
   2247    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
   2248    tl_assert(copy);
   2249    *copy = *line;
   2250    stats__ocacheL2_refs++;
   2251    VG_(OSetGen_Insert)( ocacheL2, copy );
   2252    stats__ocacheL2_n_nodes++;
   2253    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
   2254       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
   2255 }
   2256 
   2257 ////
   2258 //////////////////////////////////////////////////////////////
   2259 
   2260 __attribute__((noinline))
   2261 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
   2262 {
   2263    OCacheLine *victim, *inL2;
   2264    UChar c;
   2265    UWord line;
   2266    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2267    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2268    UWord tag     = a & tagmask;
   2269    tl_assert(setno >= 0 && setno < OC_N_SETS);
   2270 
   2271    /* we already tried line == 0; skip therefore. */
   2272    for (line = 1; line < OC_LINES_PER_SET; line++) {
   2273       if (ocacheL1->set[setno].line[line].tag == tag) {
   2274          if (line == 1) {
   2275             stats_ocacheL1_found_at_1++;
   2276          } else {
   2277             stats_ocacheL1_found_at_N++;
   2278          }
   2279          if (UNLIKELY(0 == (ocacheL1_event_ctr++
   2280                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
   2281             moveLineForwards( &ocacheL1->set[setno], line );
   2282             line--;
   2283          }
   2284          return &ocacheL1->set[setno].line[line];
   2285       }
   2286    }
   2287 
   2288    /* A miss.  Use the last slot.  Implicitly this means we're
   2289       ejecting the line in the last slot. */
   2290    stats_ocacheL1_misses++;
   2291    tl_assert(line == OC_LINES_PER_SET);
   2292    line--;
   2293    tl_assert(line > 0);
   2294 
   2295    /* First, move the to-be-ejected line to the L2 cache. */
   2296    victim = &ocacheL1->set[setno].line[line];
   2297    c = classify_OCacheLine(victim);
   2298    switch (c) {
   2299       case 'e':
   2300          /* the line is empty (has invalid tag); ignore it. */
   2301          break;
   2302       case 'z':
   2303          /* line contains zeroes.  We must ensure the backing store is
   2304             updated accordingly, either by copying the line there
   2305             verbatim, or by ensuring it isn't present there.  We
   2306             chosse the latter on the basis that it reduces the size of
   2307             the backing store. */
   2308          ocacheL2_del_tag( victim->tag );
   2309          break;
   2310       case 'n':
   2311          /* line contains at least one real, useful origin.  Copy it
   2312             to the backing store. */
   2313          stats_ocacheL1_lossage++;
   2314          inL2 = ocacheL2_find_tag( victim->tag );
   2315          if (inL2) {
   2316             *inL2 = *victim;
   2317          } else {
   2318             ocacheL2_add_line( victim );
   2319          }
   2320          break;
   2321       default:
   2322          tl_assert(0);
   2323    }
   2324 
   2325    /* Now we must reload the L1 cache from the backing tree, if
   2326       possible. */
   2327    tl_assert(tag != victim->tag); /* stay sane */
   2328    inL2 = ocacheL2_find_tag( tag );
   2329    if (inL2) {
   2330       /* We're in luck.  It's in the L2. */
   2331       ocacheL1->set[setno].line[line] = *inL2;
   2332    } else {
   2333       /* Missed at both levels of the cache hierarchy.  We have to
   2334          declare it as full of zeroes (unknown origins). */
   2335       stats__ocacheL2_misses++;
   2336       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
   2337    }
   2338 
   2339    /* Move it one forwards */
   2340    moveLineForwards( &ocacheL1->set[setno], line );
   2341    line--;
   2342 
   2343    return &ocacheL1->set[setno].line[line];
   2344 }
   2345 
   2346 static INLINE OCacheLine* find_OCacheLine ( Addr a )
   2347 {
   2348    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2349    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2350    UWord tag     = a & tagmask;
   2351 
   2352    stats_ocacheL1_find++;
   2353 
   2354    if (OC_ENABLE_ASSERTIONS) {
   2355       tl_assert(setno >= 0 && setno < OC_N_SETS);
   2356       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
   2357    }
   2358 
   2359    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
   2360       return &ocacheL1->set[setno].line[0];
   2361    }
   2362 
   2363    return find_OCacheLine_SLOW( a );
   2364 }
   2365 
   2366 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
   2367 {
   2368    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2369    //// Set the origins for a+0 .. a+7
   2370    { OCacheLine* line;
   2371      UWord lineoff = oc_line_offset(a);
   2372      if (OC_ENABLE_ASSERTIONS) {
   2373         tl_assert(lineoff >= 0
   2374                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2375      }
   2376      line = find_OCacheLine( a );
   2377      line->descr[lineoff+0] = 0xF;
   2378      line->descr[lineoff+1] = 0xF;
   2379      line->w32[lineoff+0]   = otag;
   2380      line->w32[lineoff+1]   = otag;
   2381    }
   2382    //// END inlined, specialised version of MC_(helperc_b_store8)
   2383 }
   2384 
   2385 
   2386 /*------------------------------------------------------------*/
   2387 /*--- Aligned fast case permission setters,                ---*/
   2388 /*--- for dealing with stacks                              ---*/
   2389 /*------------------------------------------------------------*/
   2390 
   2391 /*--------------------- 32-bit ---------------------*/
   2392 
   2393 /* Nb: by "aligned" here we mean 4-byte aligned */
   2394 
   2395 static INLINE void make_aligned_word32_undefined ( Addr a )
   2396 {
   2397    PROF_EVENT(300, "make_aligned_word32_undefined");
   2398 
   2399 #ifndef PERF_FAST_STACK2
   2400    make_mem_undefined(a, 4);
   2401 #else
   2402    {
   2403       UWord   sm_off;
   2404       SecMap* sm;
   2405 
   2406       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2407          PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
   2408          make_mem_undefined(a, 4);
   2409          return;
   2410       }
   2411 
   2412       sm                  = get_secmap_for_writing_low(a);
   2413       sm_off              = SM_OFF(a);
   2414       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   2415    }
   2416 #endif
   2417 }
   2418 
   2419 static INLINE
   2420 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
   2421 {
   2422    make_aligned_word32_undefined(a);
   2423    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2424    //// Set the origins for a+0 .. a+3
   2425    { OCacheLine* line;
   2426      UWord lineoff = oc_line_offset(a);
   2427      if (OC_ENABLE_ASSERTIONS) {
   2428         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2429      }
   2430      line = find_OCacheLine( a );
   2431      line->descr[lineoff] = 0xF;
   2432      line->w32[lineoff]   = otag;
   2433    }
   2434    //// END inlined, specialised version of MC_(helperc_b_store4)
   2435 }
   2436 
   2437 static INLINE
   2438 void make_aligned_word32_noaccess ( Addr a )
   2439 {
   2440    PROF_EVENT(310, "make_aligned_word32_noaccess");
   2441 
   2442 #ifndef PERF_FAST_STACK2
   2443    MC_(make_mem_noaccess)(a, 4);
   2444 #else
   2445    {
   2446       UWord   sm_off;
   2447       SecMap* sm;
   2448 
   2449       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2450          PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
   2451          MC_(make_mem_noaccess)(a, 4);
   2452          return;
   2453       }
   2454 
   2455       sm                  = get_secmap_for_writing_low(a);
   2456       sm_off              = SM_OFF(a);
   2457       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
   2458 
   2459       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2460       //// Set the origins for a+0 .. a+3.
   2461       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2462          OCacheLine* line;
   2463          UWord lineoff = oc_line_offset(a);
   2464          if (OC_ENABLE_ASSERTIONS) {
   2465             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2466          }
   2467          line = find_OCacheLine( a );
   2468          line->descr[lineoff] = 0;
   2469       }
   2470       //// END inlined, specialised version of MC_(helperc_b_store4)
   2471    }
   2472 #endif
   2473 }
   2474 
   2475 /*--------------------- 64-bit ---------------------*/
   2476 
   2477 /* Nb: by "aligned" here we mean 8-byte aligned */
   2478 
   2479 static INLINE void make_aligned_word64_undefined ( Addr a )
   2480 {
   2481    PROF_EVENT(320, "make_aligned_word64_undefined");
   2482 
   2483 #ifndef PERF_FAST_STACK2
   2484    make_mem_undefined(a, 8);
   2485 #else
   2486    {
   2487       UWord   sm_off16;
   2488       SecMap* sm;
   2489 
   2490       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2491          PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
   2492          make_mem_undefined(a, 8);
   2493          return;
   2494       }
   2495 
   2496       sm       = get_secmap_for_writing_low(a);
   2497       sm_off16 = SM_OFF_16(a);
   2498       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
   2499    }
   2500 #endif
   2501 }
   2502 
   2503 static INLINE
   2504 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
   2505 {
   2506    make_aligned_word64_undefined(a);
   2507    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2508    //// Set the origins for a+0 .. a+7
   2509    { OCacheLine* line;
   2510      UWord lineoff = oc_line_offset(a);
   2511      tl_assert(lineoff >= 0
   2512                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2513      line = find_OCacheLine( a );
   2514      line->descr[lineoff+0] = 0xF;
   2515      line->descr[lineoff+1] = 0xF;
   2516      line->w32[lineoff+0]   = otag;
   2517      line->w32[lineoff+1]   = otag;
   2518    }
   2519    //// END inlined, specialised version of MC_(helperc_b_store8)
   2520 }
   2521 
   2522 static INLINE
   2523 void make_aligned_word64_noaccess ( Addr a )
   2524 {
   2525    PROF_EVENT(330, "make_aligned_word64_noaccess");
   2526 
   2527 #ifndef PERF_FAST_STACK2
   2528    MC_(make_mem_noaccess)(a, 8);
   2529 #else
   2530    {
   2531       UWord   sm_off16;
   2532       SecMap* sm;
   2533 
   2534       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2535          PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
   2536          MC_(make_mem_noaccess)(a, 8);
   2537          return;
   2538       }
   2539 
   2540       sm       = get_secmap_for_writing_low(a);
   2541       sm_off16 = SM_OFF_16(a);
   2542       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
   2543 
   2544       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2545       //// Clear the origins for a+0 .. a+7.
   2546       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2547          OCacheLine* line;
   2548          UWord lineoff = oc_line_offset(a);
   2549          tl_assert(lineoff >= 0
   2550                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2551          line = find_OCacheLine( a );
   2552          line->descr[lineoff+0] = 0;
   2553          line->descr[lineoff+1] = 0;
   2554       }
   2555       //// END inlined, specialised version of MC_(helperc_b_store8)
   2556    }
   2557 #endif
   2558 }
   2559 
   2560 
   2561 /*------------------------------------------------------------*/
   2562 /*--- Stack pointer adjustment                             ---*/
   2563 /*------------------------------------------------------------*/
   2564 
   2565 #ifdef PERF_FAST_STACK
   2566 #  define MAYBE_USED
   2567 #else
   2568 #  define MAYBE_USED __attribute__((unused))
   2569 #endif
   2570 
   2571 /*--------------- adjustment by 4 bytes ---------------*/
   2572 
   2573 MAYBE_USED
   2574 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
   2575 {
   2576    UInt otag = ecu | MC_OKIND_STACK;
   2577    PROF_EVENT(110, "new_mem_stack_4");
   2578    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2579       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2580    } else {
   2581       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
   2582    }
   2583 }
   2584 
   2585 MAYBE_USED
   2586 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
   2587 {
   2588    PROF_EVENT(110, "new_mem_stack_4");
   2589    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2590       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2591    } else {
   2592       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
   2593    }
   2594 }
   2595 
   2596 MAYBE_USED
   2597 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
   2598 {
   2599    PROF_EVENT(120, "die_mem_stack_4");
   2600    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2601       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2602    } else {
   2603       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
   2604    }
   2605 }
   2606 
   2607 /*--------------- adjustment by 8 bytes ---------------*/
   2608 
   2609 MAYBE_USED
   2610 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
   2611 {
   2612    UInt otag = ecu | MC_OKIND_STACK;
   2613    PROF_EVENT(111, "new_mem_stack_8");
   2614    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2615       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2616    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2617       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2618       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2619    } else {
   2620       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
   2621    }
   2622 }
   2623 
   2624 MAYBE_USED
   2625 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
   2626 {
   2627    PROF_EVENT(111, "new_mem_stack_8");
   2628    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2629       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2630    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2631       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2632       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2633    } else {
   2634       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
   2635    }
   2636 }
   2637 
   2638 MAYBE_USED
   2639 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
   2640 {
   2641    PROF_EVENT(121, "die_mem_stack_8");
   2642    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2643       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2644    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2645       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2646       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2647    } else {
   2648       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
   2649    }
   2650 }
   2651 
   2652 /*--------------- adjustment by 12 bytes ---------------*/
   2653 
   2654 MAYBE_USED
   2655 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
   2656 {
   2657    UInt otag = ecu | MC_OKIND_STACK;
   2658    PROF_EVENT(112, "new_mem_stack_12");
   2659    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2660       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2661       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2662    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2663       /* from previous test we don't have 8-alignment at offset +0,
   2664          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2665          do 4 at +0 and then 8 at +4/. */
   2666       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2667       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2668    } else {
   2669       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
   2670    }
   2671 }
   2672 
   2673 MAYBE_USED
   2674 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
   2675 {
   2676    PROF_EVENT(112, "new_mem_stack_12");
   2677    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2678       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2679       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2680    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2681       /* from previous test we don't have 8-alignment at offset +0,
   2682          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2683          do 4 at +0 and then 8 at +4/. */
   2684       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2685       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2686    } else {
   2687       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
   2688    }
   2689 }
   2690 
   2691 MAYBE_USED
   2692 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
   2693 {
   2694    PROF_EVENT(122, "die_mem_stack_12");
   2695    /* Note the -12 in the test */
   2696    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
   2697       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
   2698          -4. */
   2699       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2700       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2701    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2702       /* We have 4-alignment at +0, but we don't have 8-alignment at
   2703          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
   2704          and then 8 at -8. */
   2705       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2706       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2707    } else {
   2708       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
   2709    }
   2710 }
   2711 
   2712 /*--------------- adjustment by 16 bytes ---------------*/
   2713 
   2714 MAYBE_USED
   2715 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
   2716 {
   2717    UInt otag = ecu | MC_OKIND_STACK;
   2718    PROF_EVENT(113, "new_mem_stack_16");
   2719    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2720       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2721       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2722       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2723    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2724       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2725          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2726       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2727       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2728       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2729    } else {
   2730       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
   2731    }
   2732 }
   2733 
   2734 MAYBE_USED
   2735 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
   2736 {
   2737    PROF_EVENT(113, "new_mem_stack_16");
   2738    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2739       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2740       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2741       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2742    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2743       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2744          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2745       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2746       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
   2747       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   2748    } else {
   2749       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
   2750    }
   2751 }
   2752 
   2753 MAYBE_USED
   2754 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
   2755 {
   2756    PROF_EVENT(123, "die_mem_stack_16");
   2757    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2758       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
   2759       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2760       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2761    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2762       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
   2763       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2764       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2765       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2766    } else {
   2767       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
   2768    }
   2769 }
   2770 
   2771 /*--------------- adjustment by 32 bytes ---------------*/
   2772 
   2773 MAYBE_USED
   2774 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
   2775 {
   2776    UInt otag = ecu | MC_OKIND_STACK;
   2777    PROF_EVENT(114, "new_mem_stack_32");
   2778    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2779       /* Straightforward */
   2780       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2781       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2782       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2783       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2784    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2785       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   2786          +0,+28. */
   2787       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2788       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2789       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2790       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
   2791       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
   2792    } else {
   2793       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
   2794    }
   2795 }
   2796 
   2797 MAYBE_USED
   2798 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
   2799 {
   2800    PROF_EVENT(114, "new_mem_stack_32");
   2801    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2802       /* Straightforward */
   2803       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2804       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2805       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   2806       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   2807    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2808       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   2809          +0,+28. */
   2810       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2811       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2812       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   2813       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
   2814       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
   2815    } else {
   2816       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
   2817    }
   2818 }
   2819 
   2820 MAYBE_USED
   2821 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
   2822 {
   2823    PROF_EVENT(124, "die_mem_stack_32");
   2824    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2825       /* Straightforward */
   2826       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2827       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   2828       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2829       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   2830    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2831       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
   2832          4 at -32,-4. */
   2833       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2834       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
   2835       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
   2836       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2837       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2838    } else {
   2839       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
   2840    }
   2841 }
   2842 
   2843 /*--------------- adjustment by 112 bytes ---------------*/
   2844 
   2845 MAYBE_USED
   2846 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
   2847 {
   2848    UInt otag = ecu | MC_OKIND_STACK;
   2849    PROF_EVENT(115, "new_mem_stack_112");
   2850    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2851       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2852       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2853       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2854       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2855       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   2856       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   2857       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   2858       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   2859       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   2860       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   2861       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   2862       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   2863       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   2864       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   2865    } else {
   2866       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
   2867    }
   2868 }
   2869 
   2870 MAYBE_USED
   2871 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
   2872 {
   2873    PROF_EVENT(115, "new_mem_stack_112");
   2874    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2875       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2876       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2877       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   2878       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   2879       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   2880       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   2881       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   2882       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   2883       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   2884       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   2885       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   2886       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   2887       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   2888       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   2889    } else {
   2890       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
   2891    }
   2892 }
   2893 
   2894 MAYBE_USED
   2895 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
   2896 {
   2897    PROF_EVENT(125, "die_mem_stack_112");
   2898    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2899       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   2900       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   2901       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   2902       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   2903       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   2904       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   2905       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   2906       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   2907       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   2908       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   2909       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2910       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   2911       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2912       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   2913    } else {
   2914       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
   2915    }
   2916 }
   2917 
   2918 /*--------------- adjustment by 128 bytes ---------------*/
   2919 
   2920 MAYBE_USED
   2921 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
   2922 {
   2923    UInt otag = ecu | MC_OKIND_STACK;
   2924    PROF_EVENT(116, "new_mem_stack_128");
   2925    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2926       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2927       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2928       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2929       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2930       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   2931       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   2932       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   2933       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   2934       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   2935       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   2936       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   2937       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   2938       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   2939       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   2940       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   2941       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   2942    } else {
   2943       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
   2944    }
   2945 }
   2946 
   2947 MAYBE_USED
   2948 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
   2949 {
   2950    PROF_EVENT(116, "new_mem_stack_128");
   2951    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2952       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2953       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2954       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   2955       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   2956       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   2957       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   2958       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   2959       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   2960       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   2961       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   2962       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   2963       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   2964       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   2965       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   2966       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   2967       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   2968    } else {
   2969       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
   2970    }
   2971 }
   2972 
   2973 MAYBE_USED
   2974 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
   2975 {
   2976    PROF_EVENT(126, "die_mem_stack_128");
   2977    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2978       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   2979       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   2980       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   2981       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   2982       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   2983       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   2984       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   2985       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   2986       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   2987       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   2988       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   2989       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   2990       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2991       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   2992       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2993       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   2994    } else {
   2995       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
   2996    }
   2997 }
   2998 
   2999 /*--------------- adjustment by 144 bytes ---------------*/
   3000 
   3001 MAYBE_USED
   3002 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
   3003 {
   3004    UInt otag = ecu | MC_OKIND_STACK;
   3005    PROF_EVENT(117, "new_mem_stack_144");
   3006    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3007       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3008       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3009       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3010       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3011       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3012       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3013       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3014       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3015       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3016       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3017       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3018       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3019       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3020       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3021       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3022       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3023       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3024       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3025    } else {
   3026       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
   3027    }
   3028 }
   3029 
   3030 MAYBE_USED
   3031 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
   3032 {
   3033    PROF_EVENT(117, "new_mem_stack_144");
   3034    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3035       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3036       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3037       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3038       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3039       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3040       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3041       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3042       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3043       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3044       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3045       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3046       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3047       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3048       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3049       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3050       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3051       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3052       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3053    } else {
   3054       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
   3055    }
   3056 }
   3057 
   3058 MAYBE_USED
   3059 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
   3060 {
   3061    PROF_EVENT(127, "die_mem_stack_144");
   3062    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3063       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3064       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3065       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3066       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3067       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3068       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3069       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3070       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3071       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3072       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3073       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3074       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3075       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3076       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3077       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3078       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3079       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3080       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3081    } else {
   3082       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
   3083    }
   3084 }
   3085 
   3086 /*--------------- adjustment by 160 bytes ---------------*/
   3087 
   3088 MAYBE_USED
   3089 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
   3090 {
   3091    UInt otag = ecu | MC_OKIND_STACK;
   3092    PROF_EVENT(118, "new_mem_stack_160");
   3093    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3094       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3095       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3096       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3097       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3098       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3099       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3100       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3101       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3102       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3103       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3104       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3105       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3106       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3107       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3108       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3109       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3110       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3111       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3112       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
   3113       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
   3114    } else {
   3115       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
   3116    }
   3117 }
   3118 
   3119 MAYBE_USED
   3120 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
   3121 {
   3122    PROF_EVENT(118, "new_mem_stack_160");
   3123    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3124       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3125       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3126       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3127       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3128       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3129       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3130       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3131       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3132       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3133       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3134       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3135       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3136       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3137       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3138       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3139       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3140       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3141       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3142       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
   3143       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
   3144    } else {
   3145       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
   3146    }
   3147 }
   3148 
   3149 MAYBE_USED
   3150 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
   3151 {
   3152    PROF_EVENT(128, "die_mem_stack_160");
   3153    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3154       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
   3155       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
   3156       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3157       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3158       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3159       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3160       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3161       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3162       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3163       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3164       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3165       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3166       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3167       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3168       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3169       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3170       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3171       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3172       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3173       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3174    } else {
   3175       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
   3176    }
   3177 }
   3178 
   3179 /*--------------- adjustment by N bytes ---------------*/
   3180 
   3181 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
   3182 {
   3183    UInt otag = ecu | MC_OKIND_STACK;
   3184    PROF_EVENT(115, "new_mem_stack_w_otag");
   3185    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
   3186 }
   3187 
   3188 static void mc_new_mem_stack ( Addr a, SizeT len )
   3189 {
   3190    PROF_EVENT(115, "new_mem_stack");
   3191    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
   3192 }
   3193 
   3194 static void mc_die_mem_stack ( Addr a, SizeT len )
   3195 {
   3196    PROF_EVENT(125, "die_mem_stack");
   3197    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
   3198 }
   3199 
   3200 
   3201 /* The AMD64 ABI says:
   3202 
   3203    "The 128-byte area beyond the location pointed to by %rsp is considered
   3204     to be reserved and shall not be modified by signal or interrupt
   3205     handlers.  Therefore, functions may use this area for temporary data
   3206     that is not needed across function calls.  In particular, leaf functions
   3207     may use this area for their entire stack frame, rather than adjusting
   3208     the stack pointer in the prologue and epilogue.  This area is known as
   3209     red zone [sic]."
   3210 
   3211    So after any call or return we need to mark this redzone as containing
   3212    undefined values.
   3213 
   3214    Consider this:  we're in function f.  f calls g.  g moves rsp down
   3215    modestly (say 16 bytes) and writes stuff all over the red zone, making it
   3216    defined.  g returns.  f is buggy and reads from parts of the red zone
   3217    that it didn't write on.  But because g filled that area in, f is going
   3218    to be picking up defined V bits and so any errors from reading bits of
   3219    the red zone it didn't write, will be missed.  The only solution I could
   3220    think of was to make the red zone undefined when g returns to f.
   3221 
   3222    This is in accordance with the ABI, which makes it clear the redzone
   3223    is volatile across function calls.
   3224 
   3225    The problem occurs the other way round too: f could fill the RZ up
   3226    with defined values and g could mistakenly read them.  So the RZ
   3227    also needs to be nuked on function calls.
   3228 */
   3229 
   3230 
   3231 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
   3232    improved so as to have a lower miss rate. */
   3233 
   3234 static UWord stats__nia_cache_queries = 0;
   3235 static UWord stats__nia_cache_misses  = 0;
   3236 
   3237 typedef
   3238    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
   3239             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
   3240    WCacheEnt;
   3241 
   3242 #define N_NIA_TO_ECU_CACHE 511
   3243 
   3244 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
   3245 
   3246 static void init_nia_to_ecu_cache ( void )
   3247 {
   3248    UWord       i;
   3249    Addr        zero_addr = 0;
   3250    ExeContext* zero_ec;
   3251    UInt        zero_ecu;
   3252    /* Fill all the slots with an entry for address zero, and the
   3253       relevant otags accordingly.  Hence the cache is initially filled
   3254       with valid data. */
   3255    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
   3256    tl_assert(zero_ec);
   3257    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
   3258    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
   3259    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
   3260       nia_to_ecu_cache[i].nia0 = zero_addr;
   3261       nia_to_ecu_cache[i].ecu0 = zero_ecu;
   3262       nia_to_ecu_cache[i].nia1 = zero_addr;
   3263       nia_to_ecu_cache[i].ecu1 = zero_ecu;
   3264    }
   3265 }
   3266 
   3267 static inline UInt convert_nia_to_ecu ( Addr nia )
   3268 {
   3269    UWord i;
   3270    UInt        ecu;
   3271    ExeContext* ec;
   3272 
   3273    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
   3274 
   3275    stats__nia_cache_queries++;
   3276    i = nia % N_NIA_TO_ECU_CACHE;
   3277    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
   3278 
   3279    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
   3280       return nia_to_ecu_cache[i].ecu0;
   3281 
   3282    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
   3283 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
   3284       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
   3285       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
   3286 #     undef SWAP
   3287       return nia_to_ecu_cache[i].ecu0;
   3288    }
   3289 
   3290    stats__nia_cache_misses++;
   3291    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
   3292    tl_assert(ec);
   3293    ecu = VG_(get_ECU_from_ExeContext)(ec);
   3294    tl_assert(VG_(is_plausible_ECU)(ecu));
   3295 
   3296    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
   3297    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
   3298 
   3299    nia_to_ecu_cache[i].nia0 = nia;
   3300    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
   3301    return ecu;
   3302 }
   3303 
   3304 
   3305 /* Note that this serves both the origin-tracking and
   3306    no-origin-tracking modes.  We assume that calls to it are
   3307    sufficiently infrequent that it isn't worth specialising for the
   3308    with/without origin-tracking cases. */
   3309 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
   3310 {
   3311    UInt otag;
   3312    tl_assert(sizeof(UWord) == sizeof(SizeT));
   3313    if (0)
   3314       VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
   3315                   base, len, nia );
   3316 
   3317    if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3318       UInt ecu = convert_nia_to_ecu ( nia );
   3319       tl_assert(VG_(is_plausible_ECU)(ecu));
   3320       otag = ecu | MC_OKIND_STACK;
   3321    } else {
   3322       tl_assert(nia == 0);
   3323       otag = 0;
   3324    }
   3325 
   3326 #  if 0
   3327    /* Really slow version */
   3328    MC_(make_mem_undefined)(base, len, otag);
   3329 #  endif
   3330 
   3331 #  if 0
   3332    /* Slow(ish) version, which is fairly easily seen to be correct.
   3333    */
   3334    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
   3335       make_aligned_word64_undefined(base +   0, otag);
   3336       make_aligned_word64_undefined(base +   8, otag);
   3337       make_aligned_word64_undefined(base +  16, otag);
   3338       make_aligned_word64_undefined(base +  24, otag);
   3339 
   3340       make_aligned_word64_undefined(base +  32, otag);
   3341       make_aligned_word64_undefined(base +  40, otag);
   3342       make_aligned_word64_undefined(base +  48, otag);
   3343       make_aligned_word64_undefined(base +  56, otag);
   3344 
   3345       make_aligned_word64_undefined(base +  64, otag);
   3346       make_aligned_word64_undefined(base +  72, otag);
   3347       make_aligned_word64_undefined(base +  80, otag);
   3348       make_aligned_word64_undefined(base +  88, otag);
   3349 
   3350       make_aligned_word64_undefined(base +  96, otag);
   3351       make_aligned_word64_undefined(base + 104, otag);
   3352       make_aligned_word64_undefined(base + 112, otag);
   3353       make_aligned_word64_undefined(base + 120, otag);
   3354    } else {
   3355       MC_(make_mem_undefined)(base, len, otag);
   3356    }
   3357 #  endif
   3358 
   3359    /* Idea is: go fast when
   3360          * 8-aligned and length is 128
   3361          * the sm is available in the main primary map
   3362          * the address range falls entirely with a single secondary map
   3363       If all those conditions hold, just update the V+A bits by writing
   3364       directly into the vabits array.  (If the sm was distinguished, this
   3365       will make a copy and then write to it.)
   3366    */
   3367 
   3368    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
   3369       /* Now we know the address range is suitably sized and aligned. */
   3370       UWord a_lo = (UWord)(base);
   3371       UWord a_hi = (UWord)(base + 128 - 1);
   3372       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3373       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3374          // Now we know the entire range is within the main primary map.
   3375          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3376          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3377          /* Now we know that the entire address range falls within a
   3378             single secondary map, and that that secondary 'lives' in
   3379             the main primary map. */
   3380          if (LIKELY(sm == sm_hi)) {
   3381             // Finally, we know that the range is entirely within one secmap.
   3382             UWord   v_off = SM_OFF(a_lo);
   3383             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3384             p[ 0] = VA_BITS16_UNDEFINED;
   3385             p[ 1] = VA_BITS16_UNDEFINED;
   3386             p[ 2] = VA_BITS16_UNDEFINED;
   3387             p[ 3] = VA_BITS16_UNDEFINED;
   3388             p[ 4] = VA_BITS16_UNDEFINED;
   3389             p[ 5] = VA_BITS16_UNDEFINED;
   3390             p[ 6] = VA_BITS16_UNDEFINED;
   3391             p[ 7] = VA_BITS16_UNDEFINED;
   3392             p[ 8] = VA_BITS16_UNDEFINED;
   3393             p[ 9] = VA_BITS16_UNDEFINED;
   3394             p[10] = VA_BITS16_UNDEFINED;
   3395             p[11] = VA_BITS16_UNDEFINED;
   3396             p[12] = VA_BITS16_UNDEFINED;
   3397             p[13] = VA_BITS16_UNDEFINED;
   3398             p[14] = VA_BITS16_UNDEFINED;
   3399             p[15] = VA_BITS16_UNDEFINED;
   3400             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3401                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3402                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3403                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3404                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3405                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3406                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3407                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3408                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3409                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3410                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3411                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3412                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3413                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3414                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3415                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3416                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3417             }
   3418             return;
   3419          }
   3420       }
   3421    }
   3422 
   3423    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
   3424    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
   3425       /* Now we know the address range is suitably sized and aligned. */
   3426       UWord a_lo = (UWord)(base);
   3427       UWord a_hi = (UWord)(base + 288 - 1);
   3428       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3429       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3430          // Now we know the entire range is within the main primary map.
   3431          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3432          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3433          /* Now we know that the entire address range falls within a
   3434             single secondary map, and that that secondary 'lives' in
   3435             the main primary map. */
   3436          if (LIKELY(sm == sm_hi)) {
   3437             // Finally, we know that the range is entirely within one secmap.
   3438             UWord   v_off = SM_OFF(a_lo);
   3439             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3440             p[ 0] = VA_BITS16_UNDEFINED;
   3441             p[ 1] = VA_BITS16_UNDEFINED;
   3442             p[ 2] = VA_BITS16_UNDEFINED;
   3443             p[ 3] = VA_BITS16_UNDEFINED;
   3444             p[ 4] = VA_BITS16_UNDEFINED;
   3445             p[ 5] = VA_BITS16_UNDEFINED;
   3446             p[ 6] = VA_BITS16_UNDEFINED;
   3447             p[ 7] = VA_BITS16_UNDEFINED;
   3448             p[ 8] = VA_BITS16_UNDEFINED;
   3449             p[ 9] = VA_BITS16_UNDEFINED;
   3450             p[10] = VA_BITS16_UNDEFINED;
   3451             p[11] = VA_BITS16_UNDEFINED;
   3452             p[12] = VA_BITS16_UNDEFINED;
   3453             p[13] = VA_BITS16_UNDEFINED;
   3454             p[14] = VA_BITS16_UNDEFINED;
   3455             p[15] = VA_BITS16_UNDEFINED;
   3456             p[16] = VA_BITS16_UNDEFINED;
   3457             p[17] = VA_BITS16_UNDEFINED;
   3458             p[18] = VA_BITS16_UNDEFINED;
   3459             p[19] = VA_BITS16_UNDEFINED;
   3460             p[20] = VA_BITS16_UNDEFINED;
   3461             p[21] = VA_BITS16_UNDEFINED;
   3462             p[22] = VA_BITS16_UNDEFINED;
   3463             p[23] = VA_BITS16_UNDEFINED;
   3464             p[24] = VA_BITS16_UNDEFINED;
   3465             p[25] = VA_BITS16_UNDEFINED;
   3466             p[26] = VA_BITS16_UNDEFINED;
   3467             p[27] = VA_BITS16_UNDEFINED;
   3468             p[28] = VA_BITS16_UNDEFINED;
   3469             p[29] = VA_BITS16_UNDEFINED;
   3470             p[30] = VA_BITS16_UNDEFINED;
   3471             p[31] = VA_BITS16_UNDEFINED;
   3472             p[32] = VA_BITS16_UNDEFINED;
   3473             p[33] = VA_BITS16_UNDEFINED;
   3474             p[34] = VA_BITS16_UNDEFINED;
   3475             p[35] = VA_BITS16_UNDEFINED;
   3476             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3477                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3478                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3479                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3480                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3481                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3482                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3483                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3484                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3485                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3486                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3487                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3488                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3489                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3490                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3491                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3492                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3493                set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
   3494                set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
   3495                set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
   3496                set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
   3497                set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
   3498                set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
   3499                set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
   3500                set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
   3501                set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
   3502                set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
   3503                set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
   3504                set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
   3505                set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
   3506                set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
   3507                set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
   3508                set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
   3509                set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
   3510                set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
   3511                set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
   3512                set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
   3513             }
   3514             return;
   3515          }
   3516       }
   3517    }
   3518 
   3519    /* else fall into slow case */
   3520    MC_(make_mem_undefined_w_otag)(base, len, otag);
   3521 }
   3522 
   3523 
   3524 /*------------------------------------------------------------*/
   3525 /*--- Checking memory                                      ---*/
   3526 /*------------------------------------------------------------*/
   3527 
   3528 typedef
   3529    enum {
   3530       MC_Ok = 5,
   3531       MC_AddrErr = 6,
   3532       MC_ValueErr = 7
   3533    }
   3534    MC_ReadResult;
   3535 
   3536 
   3537 /* Check permissions for address range.  If inadequate permissions
   3538    exist, *bad_addr is set to the offending address, so the caller can
   3539    know what it is. */
   3540 
   3541 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
   3542    returns False, and if bad_addr is non-NULL, sets *bad_addr to
   3543    indicate the lowest failing address.  Functions below are
   3544    similar. */
   3545 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
   3546 {
   3547    SizeT i;
   3548    UWord vabits2;
   3549 
   3550    PROF_EVENT(60, "check_mem_is_noaccess");
   3551    for (i = 0; i < len; i++) {
   3552       PROF_EVENT(61, "check_mem_is_noaccess(loop)");
   3553       vabits2 = get_vabits2(a);
   3554       if (VA_BITS2_NOACCESS != vabits2) {
   3555          if (bad_addr != NULL) *bad_addr = a;
   3556          return False;
   3557       }
   3558       a++;
   3559    }
   3560    return True;
   3561 }
   3562 
   3563 static Bool is_mem_addressable ( Addr a, SizeT len,
   3564                                  /*OUT*/Addr* bad_addr )
   3565 {
   3566    SizeT i;
   3567    UWord vabits2;
   3568 
   3569    PROF_EVENT(62, "is_mem_addressable");
   3570    for (i = 0; i < len; i++) {
   3571       PROF_EVENT(63, "is_mem_addressable(loop)");
   3572       vabits2 = get_vabits2(a);
   3573       if (VA_BITS2_NOACCESS == vabits2) {
   3574          if (bad_addr != NULL) *bad_addr = a;
   3575          return False;
   3576       }
   3577       a++;
   3578    }
   3579    return True;
   3580 }
   3581 
   3582 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
   3583                                       /*OUT*/Addr* bad_addr,
   3584                                       /*OUT*/UInt* otag )
   3585 {
   3586    SizeT i;
   3587    UWord vabits2;
   3588 
   3589    PROF_EVENT(64, "is_mem_defined");
   3590    DEBUG("is_mem_defined\n");
   3591 
   3592    if (otag)     *otag = 0;
   3593    if (bad_addr) *bad_addr = 0;
   3594    for (i = 0; i < len; i++) {
   3595       PROF_EVENT(65, "is_mem_defined(loop)");
   3596       vabits2 = get_vabits2(a);
   3597       if (VA_BITS2_DEFINED != vabits2) {
   3598          // Error!  Nb: Report addressability errors in preference to
   3599          // definedness errors.  And don't report definedeness errors unless
   3600          // --undef-value-errors=yes.
   3601          if (bad_addr) {
   3602             *bad_addr = a;
   3603          }
   3604          if (VA_BITS2_NOACCESS == vabits2) {
   3605             return MC_AddrErr;
   3606          }
   3607          if (MC_(clo_mc_level) >= 2) {
   3608             if (otag && MC_(clo_mc_level) == 3) {
   3609                *otag = MC_(helperc_b_load1)( a );
   3610             }
   3611             return MC_ValueErr;
   3612          }
   3613       }
   3614       a++;
   3615    }
   3616    return MC_Ok;
   3617 }
   3618 
   3619 
   3620 /* Like is_mem_defined but doesn't give up at the first uninitialised
   3621    byte -- the entire range is always checked.  This is important for
   3622    detecting errors in the case where a checked range strays into
   3623    invalid memory, but that fact is not detected by the ordinary
   3624    is_mem_defined(), because of an undefined section that precedes the
   3625    out of range section, possibly as a result of an alignment hole in
   3626    the checked data.  This version always checks the entire range and
   3627    can report both a definedness and an accessbility error, if
   3628    necessary. */
   3629 static void is_mem_defined_comprehensive (
   3630                Addr a, SizeT len,
   3631                /*OUT*/Bool* errorV,    /* is there a definedness err? */
   3632                /*OUT*/Addr* bad_addrV, /* if so where? */
   3633                /*OUT*/UInt* otagV,     /* and what's its otag? */
   3634                /*OUT*/Bool* errorA,    /* is there an addressability err? */
   3635                /*OUT*/Addr* bad_addrA  /* if so where? */
   3636             )
   3637 {
   3638    SizeT i;
   3639    UWord vabits2;
   3640    Bool  already_saw_errV = False;
   3641 
   3642    PROF_EVENT(64, "is_mem_defined"); // fixme
   3643    DEBUG("is_mem_defined_comprehensive\n");
   3644 
   3645    tl_assert(!(*errorV || *errorA));
   3646 
   3647    for (i = 0; i < len; i++) {
   3648       PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
   3649       vabits2 = get_vabits2(a);
   3650       switch (vabits2) {
   3651          case VA_BITS2_DEFINED:
   3652             a++;
   3653             break;
   3654          case VA_BITS2_UNDEFINED:
   3655          case VA_BITS2_PARTDEFINED:
   3656             if (!already_saw_errV) {
   3657                *errorV    = True;
   3658                *bad_addrV = a;
   3659                if (MC_(clo_mc_level) == 3) {
   3660                   *otagV = MC_(helperc_b_load1)( a );
   3661                } else {
   3662                   *otagV = 0;
   3663                }
   3664                already_saw_errV = True;
   3665             }
   3666             a++; /* keep going */
   3667             break;
   3668          case VA_BITS2_NOACCESS:
   3669             *errorA    = True;
   3670             *bad_addrA = a;
   3671             return; /* give up now. */
   3672          default:
   3673             tl_assert(0);
   3674       }
   3675    }
   3676 }
   3677 
   3678 
   3679 /* Check a zero-terminated ascii string.  Tricky -- don't want to
   3680    examine the actual bytes, to find the end, until we're sure it is
   3681    safe to do so. */
   3682 
   3683 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
   3684 {
   3685    UWord vabits2;
   3686 
   3687    PROF_EVENT(66, "mc_is_defined_asciiz");
   3688    DEBUG("mc_is_defined_asciiz\n");
   3689 
   3690    if (otag)     *otag = 0;
   3691    if (bad_addr) *bad_addr = 0;
   3692    while (True) {
   3693       PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
   3694       vabits2 = get_vabits2(a);
   3695       if (VA_BITS2_DEFINED != vabits2) {
   3696          // Error!  Nb: Report addressability errors in preference to
   3697          // definedness errors.  And don't report definedeness errors unless
   3698          // --undef-value-errors=yes.
   3699          if (bad_addr) {
   3700             *bad_addr = a;
   3701          }
   3702          if (VA_BITS2_NOACCESS == vabits2) {
   3703             return MC_AddrErr;
   3704          }
   3705          if (MC_(clo_mc_level) >= 2) {
   3706             if (otag && MC_(clo_mc_level) == 3) {
   3707                *otag = MC_(helperc_b_load1)( a );
   3708             }
   3709             return MC_ValueErr;
   3710          }
   3711       }
   3712       /* Ok, a is safe to read. */
   3713       if (* ((UChar*)a) == 0) {
   3714          return MC_Ok;
   3715       }
   3716       a++;
   3717    }
   3718 }
   3719 
   3720 
   3721 /*------------------------------------------------------------*/
   3722 /*--- Memory event handlers                                ---*/
   3723 /*------------------------------------------------------------*/
   3724 
   3725 static
   3726 void check_mem_is_addressable ( CorePart part, ThreadId tid, Char* s,
   3727                                 Addr base, SizeT size )
   3728 {
   3729    Addr bad_addr;
   3730    Bool ok = is_mem_addressable ( base, size, &bad_addr );
   3731 
   3732    if (!ok) {
   3733       switch (part) {
   3734       case Vg_CoreSysCall:
   3735          MC_(record_memparam_error) ( tid, bad_addr,
   3736                                       /*isAddrErr*/True, s, 0/*otag*/ );
   3737          break;
   3738 
   3739       case Vg_CoreSignal:
   3740          MC_(record_core_mem_error)( tid, s );
   3741          break;
   3742 
   3743       default:
   3744          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
   3745       }
   3746    }
   3747 }
   3748 
   3749 static
   3750 void check_mem_is_defined ( CorePart part, ThreadId tid, Char* s,
   3751                             Addr base, SizeT size )
   3752 {
   3753    UInt otag = 0;
   3754    Addr bad_addr;
   3755    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
   3756 
   3757    if (MC_Ok != res) {
   3758       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   3759 
   3760       switch (part) {
   3761       case Vg_CoreSysCall:
   3762          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   3763                                       isAddrErr ? 0 : otag );
   3764          break;
   3765 
   3766       case Vg_CoreSysCallArgInMem:
   3767          MC_(record_regparam_error) ( tid, s, otag );
   3768          break;
   3769 
   3770       /* If we're being asked to jump to a silly address, record an error
   3771          message before potentially crashing the entire system. */
   3772       case Vg_CoreTranslate:
   3773          MC_(record_jump_error)( tid, bad_addr );
   3774          break;
   3775 
   3776       default:
   3777          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
   3778       }
   3779    }
   3780 }
   3781 
   3782 static
   3783 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
   3784                                    Char* s, Addr str )
   3785 {
   3786    MC_ReadResult res;
   3787    Addr bad_addr = 0;   // shut GCC up
   3788    UInt otag = 0;
   3789 
   3790    tl_assert(part == Vg_CoreSysCall);
   3791    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
   3792    if (MC_Ok != res) {
   3793       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   3794       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   3795                                    isAddrErr ? 0 : otag );
   3796    }
   3797 }
   3798 
   3799 /* Handling of mmap and mprotect is not as simple as it seems.
   3800 
   3801    The underlying semantics are that memory obtained from mmap is
   3802    always initialised, but may be inaccessible.  And changes to the
   3803    protection of memory do not change its contents and hence not its
   3804    definedness state.  Problem is we can't model
   3805    inaccessible-but-with-some-definedness state; once we mark memory
   3806    as inaccessible we lose all info about definedness, and so can't
   3807    restore that if it is later made accessible again.
   3808 
   3809    One obvious thing to do is this:
   3810 
   3811       mmap/mprotect NONE  -> noaccess
   3812       mmap/mprotect other -> defined
   3813 
   3814    The problem case here is: taking accessible memory, writing
   3815    uninitialised data to it, mprotecting it NONE and later mprotecting
   3816    it back to some accessible state causes the undefinedness to be
   3817    lost.
   3818 
   3819    A better proposal is:
   3820 
   3821      (1) mmap NONE       ->  make noaccess
   3822      (2) mmap other      ->  make defined
   3823 
   3824      (3) mprotect NONE   ->  # no change
   3825      (4) mprotect other  ->  change any "noaccess" to "defined"
   3826 
   3827    (2) is OK because memory newly obtained from mmap really is defined
   3828        (zeroed out by the kernel -- doing anything else would
   3829        constitute a massive security hole.)
   3830 
   3831    (1) is OK because the only way to make the memory usable is via
   3832        (4), in which case we also wind up correctly marking it all as
   3833        defined.
   3834 
   3835    (3) is the weak case.  We choose not to change memory state.
   3836        (presumably the range is in some mixture of "defined" and
   3837        "undefined", viz, accessible but with arbitrary V bits).  Doing
   3838        nothing means we retain the V bits, so that if the memory is
   3839        later mprotected "other", the V bits remain unchanged, so there
   3840        can be no false negatives.  The bad effect is that if there's
   3841        an access in the area, then MC cannot warn; but at least we'll
   3842        get a SEGV to show, so it's better than nothing.
   3843 
   3844    Consider the sequence (3) followed by (4).  Any memory that was
   3845    "defined" or "undefined" previously retains its state (as
   3846    required).  Any memory that was "noaccess" before can only have
   3847    been made that way by (1), and so it's OK to change it to
   3848    "defined".
   3849 
   3850    See https://bugs.kde.org/show_bug.cgi?id=205541
   3851    and https://bugs.kde.org/show_bug.cgi?id=210268
   3852 */
   3853 static
   3854 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
   3855                        ULong di_handle )
   3856 {
   3857    if (rr || ww || xx) {
   3858       /* (2) mmap/mprotect other -> defined */
   3859       MC_(make_mem_defined)(a, len);
   3860    } else {
   3861       /* (1) mmap/mprotect NONE  -> noaccess */
   3862       MC_(make_mem_noaccess)(a, len);
   3863    }
   3864 }
   3865 
   3866 static
   3867 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
   3868 {
   3869    if (rr || ww || xx) {
   3870       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
   3871       make_mem_defined_if_noaccess(a, len);
   3872    } else {
   3873       /* (3) mprotect NONE   ->  # no change */
   3874       /* do nothing */
   3875    }
   3876 }
   3877 
   3878 
   3879 static
   3880 void mc_new_mem_startup( Addr a, SizeT len,
   3881                          Bool rr, Bool ww, Bool xx, ULong di_handle )
   3882 {
   3883    // Because code is defined, initialised variables get put in the data
   3884    // segment and are defined, and uninitialised variables get put in the
   3885    // bss segment and are auto-zeroed (and so defined).
   3886    //
   3887    // It's possible that there will be padding between global variables.
   3888    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
   3889    // a program uses it, Memcheck will not complain.  This is arguably a
   3890    // false negative, but it's a grey area -- the behaviour is defined (the
   3891    // padding is zeroed) but it's probably not what the user intended.  And
   3892    // we can't avoid it.
   3893    //
   3894    // Note: we generally ignore RWX permissions, because we can't track them
   3895    // without requiring more than one A bit which would slow things down a
   3896    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
   3897    // So we mark any such pages as "unaddressable".
   3898    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
   3899          a, (ULong)len, rr, ww, xx);
   3900    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
   3901 }
   3902 
   3903 static
   3904 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
   3905 {
   3906    MC_(make_mem_defined)(a, len);
   3907 }
   3908 
   3909 
   3910 /*------------------------------------------------------------*/
   3911 /*--- Register event handlers                              ---*/
   3912 /*------------------------------------------------------------*/
   3913 
   3914 /* Try and get a nonzero origin for the guest state section of thread
   3915    tid characterised by (offset,size).  Return 0 if nothing to show
   3916    for it. */
   3917 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
   3918                                              Int offset, SizeT size )
   3919 {
   3920    Int   sh2off;
   3921    UInt  area[3];
   3922    UInt  otag;
   3923    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
   3924    if (sh2off == -1)
   3925       return 0;  /* This piece of guest state is not tracked */
   3926    tl_assert(sh2off >= 0);
   3927    tl_assert(0 == (sh2off % 4));
   3928    area[0] = 0x31313131;
   3929    area[2] = 0x27272727;
   3930    VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
   3931    tl_assert(area[0] == 0x31313131);
   3932    tl_assert(area[2] == 0x27272727);
   3933    otag = area[1];
   3934    return otag;
   3935 }
   3936 
   3937 
   3938 /* When some chunk of guest state is written, mark the corresponding
   3939    shadow area as valid.  This is used to initialise arbitrarily large
   3940    chunks of guest state, hence the _SIZE value, which has to be as
   3941    big as the biggest guest state.
   3942 */
   3943 static void mc_post_reg_write ( CorePart part, ThreadId tid,
   3944                                 PtrdiffT offset, SizeT size)
   3945 {
   3946 #  define MAX_REG_WRITE_SIZE 1696
   3947    UChar area[MAX_REG_WRITE_SIZE];
   3948    tl_assert(size <= MAX_REG_WRITE_SIZE);
   3949    VG_(memset)(area, V_BITS8_DEFINED, size);
   3950    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
   3951 #  undef MAX_REG_WRITE_SIZE
   3952 }
   3953 
   3954 static
   3955 void mc_post_reg_write_clientcall ( ThreadId tid,
   3956                                     PtrdiffT offset, SizeT size, Addr f)
   3957 {
   3958    mc_post_reg_write(/*dummy*/0, tid, offset, size);
   3959 }
   3960 
   3961 /* Look at the definedness of the guest's shadow state for
   3962    [offset, offset+len).  If any part of that is undefined, record
   3963    a parameter error.
   3964 */
   3965 static void mc_pre_reg_read ( CorePart part, ThreadId tid, Char* s,
   3966                               PtrdiffT offset, SizeT size)
   3967 {
   3968    Int   i;
   3969    Bool  bad;
   3970    UInt  otag;
   3971 
   3972    UChar area[16];
   3973    tl_assert(size <= 16);
   3974 
   3975    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
   3976 
   3977    bad = False;
   3978    for (i = 0; i < size; i++) {
   3979       if (area[i] != V_BITS8_DEFINED) {
   3980          bad = True;
   3981          break;
   3982       }
   3983    }
   3984 
   3985    if (!bad)
   3986       return;
   3987 
   3988    /* We've found some undefinedness.  See if we can also find an
   3989       origin for it. */
   3990    otag = mb_get_origin_for_guest_offset( tid, offset, size );
   3991    MC_(record_regparam_error) ( tid, s, otag );
   3992 }
   3993 
   3994 
   3995 /*------------------------------------------------------------*/
   3996 /*--- Functions called directly from generated code:       ---*/
   3997 /*--- Load/store handlers.                                 ---*/
   3998 /*------------------------------------------------------------*/
   3999 
   4000 /* Types:  LOADV32, LOADV16, LOADV8 are:
   4001                UWord fn ( Addr a )
   4002    so they return 32-bits on 32-bit machines and 64-bits on
   4003    64-bit machines.  Addr has the same size as a host word.
   4004 
   4005    LOADV64 is always  ULong fn ( Addr a )
   4006 
   4007    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
   4008    are a UWord, and for STOREV64 they are a ULong.
   4009 */
   4010 
   4011 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
   4012    naturally '_sz/8'-aligned, or it exceeds the range covered by the
   4013    primary map.  This is all very tricky (and important!), so let's
   4014    work through the maths by hand (below), *and* assert for these
   4015    values at startup. */
   4016 #define MASK(_szInBytes) \
   4017    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
   4018 
   4019 /* MASK only exists so as to define this macro. */
   4020 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
   4021    ((_a) & MASK((_szInBits>>3)))
   4022 
   4023 /* On a 32-bit machine:
   4024 
   4025    N_PRIMARY_BITS          == 16, so
   4026    N_PRIMARY_MAP           == 0x10000, so
   4027    N_PRIMARY_MAP-1         == 0xFFFF, so
   4028    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
   4029 
   4030    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
   4031            = ~ ( 0xFFFF | 0xFFFF0000 )
   4032            = ~ 0xFFFF'FFFF
   4033            = 0
   4034 
   4035    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
   4036            = ~ ( 0xFFFE | 0xFFFF0000 )
   4037            = ~ 0xFFFF'FFFE
   4038            = 1
   4039 
   4040    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
   4041            = ~ ( 0xFFFC | 0xFFFF0000 )
   4042            = ~ 0xFFFF'FFFC
   4043            = 3
   4044 
   4045    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
   4046            = ~ ( 0xFFF8 | 0xFFFF0000 )
   4047            = ~ 0xFFFF'FFF8
   4048            = 7
   4049 
   4050    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
   4051    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
   4052    the 1-byte alignment case, it is always a zero value, since MASK(1)
   4053    is zero.  All as expected.
   4054 
   4055    On a 64-bit machine, it's more complex, since we're testing
   4056    simultaneously for misalignment and for the address being at or
   4057    above 32G:
   4058 
   4059    N_PRIMARY_BITS          == 19, so
   4060    N_PRIMARY_MAP           == 0x80000, so
   4061    N_PRIMARY_MAP-1         == 0x7FFFF, so
   4062    (N_PRIMARY_MAP-1) << 16 == 0x7FFFF'0000, and so
   4063 
   4064    MASK(1) = ~ ( (0x10000 - 1) | 0x7FFFF'0000 )
   4065            = ~ ( 0xFFFF | 0x7FFFF'0000 )
   4066            = ~ 0x7FFFF'FFFF
   4067            = 0xFFFF'FFF8'0000'0000
   4068 
   4069    MASK(2) = ~ ( (0x10000 - 2) | 0x7FFFF'0000 )
   4070            = ~ ( 0xFFFE | 0x7FFFF'0000 )
   4071            = ~ 0x7FFFF'FFFE
   4072            = 0xFFFF'FFF8'0000'0001
   4073 
   4074    MASK(4) = ~ ( (0x10000 - 4) | 0x7FFFF'0000 )
   4075            = ~ ( 0xFFFC | 0x7FFFF'0000 )
   4076            = ~ 0x7FFFF'FFFC
   4077            = 0xFFFF'FFF8'0000'0003
   4078 
   4079    MASK(8) = ~ ( (0x10000 - 8) | 0x7FFFF'0000 )
   4080            = ~ ( 0xFFF8 | 0x7FFFF'0000 )
   4081            = ~ 0x7FFFF'FFF8
   4082            = 0xFFFF'FFF8'0000'0007
   4083 */
   4084 
   4085 
   4086 /* ------------------------ Size = 8 ------------------------ */
   4087 
   4088 static INLINE
   4089 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
   4090 {
   4091    PROF_EVENT(200, "mc_LOADV64");
   4092 
   4093 #ifndef PERF_FAST_LOADV
   4094    return mc_LOADVn_slow( a, 64, isBigEndian );
   4095 #else
   4096    {
   4097       UWord   sm_off16, vabits16;
   4098       SecMap* sm;
   4099 
   4100       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4101          PROF_EVENT(201, "mc_LOADV64-slow1");
   4102          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
   4103       }
   4104 
   4105       sm       = get_secmap_for_reading_low(a);
   4106       sm_off16 = SM_OFF_16(a);
   4107       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4108 
   4109       // Handle common case quickly: a is suitably aligned, is mapped, and
   4110       // addressible.
   4111       // Convert V bits from compact memory form to expanded register form.
   4112       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
   4113          return V_BITS64_DEFINED;
   4114       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
   4115          return V_BITS64_UNDEFINED;
   4116       } else {
   4117          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
   4118          PROF_EVENT(202, "mc_LOADV64-slow2");
   4119          return mc_LOADVn_slow( a, 64, isBigEndian );
   4120       }
   4121    }
   4122 #endif
   4123 }
   4124 
   4125 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
   4126 {
   4127    return mc_LOADV64(a, True);
   4128 }
   4129 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
   4130 {
   4131    return mc_LOADV64(a, False);
   4132 }
   4133 
   4134 
   4135 static INLINE
   4136 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
   4137 {
   4138    PROF_EVENT(210, "mc_STOREV64");
   4139 
   4140 #ifndef PERF_FAST_STOREV
   4141    // XXX: this slow case seems to be marginally faster than the fast case!
   4142    // Investigate further.
   4143    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4144 #else
   4145    {
   4146       UWord   sm_off16, vabits16;
   4147       SecMap* sm;
   4148 
   4149       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4150          PROF_EVENT(211, "mc_STOREV64-slow1");
   4151          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4152          return;
   4153       }
   4154 
   4155       sm       = get_secmap_for_reading_low(a);
   4156       sm_off16 = SM_OFF_16(a);
   4157       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4158 
   4159       if (LIKELY( !is_distinguished_sm(sm) &&
   4160                           (VA_BITS16_DEFINED   == vabits16 ||
   4161                            VA_BITS16_UNDEFINED == vabits16) ))
   4162       {
   4163          /* Handle common case quickly: a is suitably aligned, */
   4164          /* is mapped, and is addressible. */
   4165          // Convert full V-bits in register to compact 2-bit form.
   4166          if (V_BITS64_DEFINED == vbits64) {
   4167             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
   4168          } else if (V_BITS64_UNDEFINED == vbits64) {
   4169             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
   4170          } else {
   4171             /* Slow but general case -- writing partially defined bytes. */
   4172             PROF_EVENT(212, "mc_STOREV64-slow2");
   4173             mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4174          }
   4175       } else {
   4176          /* Slow but general case. */
   4177          PROF_EVENT(213, "mc_STOREV64-slow3");
   4178          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4179       }
   4180    }
   4181 #endif
   4182 }
   4183 
   4184 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
   4185 {
   4186    mc_STOREV64(a, vbits64, True);
   4187 }
   4188 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
   4189 {
   4190    mc_STOREV64(a, vbits64, False);
   4191 }
   4192 
   4193 
   4194 /* ------------------------ Size = 4 ------------------------ */
   4195 
   4196 static INLINE
   4197 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
   4198 {
   4199    PROF_EVENT(220, "mc_LOADV32");
   4200 
   4201 #ifndef PERF_FAST_LOADV
   4202    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4203 #else
   4204    {
   4205       UWord   sm_off, vabits8;
   4206       SecMap* sm;
   4207 
   4208       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   4209          PROF_EVENT(221, "mc_LOADV32-slow1");
   4210          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4211       }
   4212 
   4213       sm      = get_secmap_for_reading_low(a);
   4214       sm_off  = SM_OFF(a);
   4215       vabits8 = sm->vabits8[sm_off];
   4216 
   4217       // Handle common case quickly: a is suitably aligned, is mapped, and the
   4218       // entire word32 it lives in is addressible.
   4219       // Convert V bits from compact memory form to expanded register form.
   4220       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
   4221       // Almost certainly not necessary, but be paranoid.
   4222       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   4223          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   4224       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
   4225          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   4226       } else {
   4227          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
   4228          PROF_EVENT(222, "mc_LOADV32-slow2");
   4229          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4230       }
   4231    }
   4232 #endif
   4233 }
   4234 
   4235 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
   4236 {
   4237    return mc_LOADV32(a, True);
   4238 }
   4239 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
   4240 {
   4241    return mc_LOADV32(a, False);
   4242 }
   4243 
   4244 
   4245 static INLINE
   4246 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
   4247 {
   4248    PROF_EVENT(230, "mc_STOREV32");
   4249 
   4250 #ifndef PERF_FAST_STOREV
   4251    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4252 #else
   4253    {
   4254       UWord   sm_off, vabits8;
   4255       SecMap* sm;
   4256 
   4257       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   4258          PROF_EVENT(231, "mc_STOREV32-slow1");
   4259          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4260          return;
   4261       }
   4262 
   4263       sm      = get_secmap_for_reading_low(a);
   4264       sm_off  = SM_OFF(a);
   4265       vabits8 = sm->vabits8[sm_off];
   4266 
   4267       // Cleverness:  sometimes we don't have to write the shadow memory at
   4268       // all, if we can tell that what we want to write is the same as what is
   4269       // already there.  The 64/16/8 bit cases also have cleverness at this
   4270       // point, but it works a little differently to the code below.
   4271       if (V_BITS32_DEFINED == vbits32) {
   4272          if (vabits8 == (UInt)VA_BITS8_DEFINED) {
   4273             return;
   4274          } else if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
   4275             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
   4276          } else {
   4277             // not defined/undefined, or distinguished and changing state
   4278             PROF_EVENT(232, "mc_STOREV32-slow2");
   4279             mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4280          }
   4281       } else if (V_BITS32_UNDEFINED == vbits32) {
   4282          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
   4283             return;
   4284          } else if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
   4285             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
   4286          } else {
   4287             // not defined/undefined, or distinguished and changing state
   4288             PROF_EVENT(233, "mc_STOREV32-slow3");
   4289             mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4290          }
   4291       } else {
   4292          // Partially defined word
   4293          PROF_EVENT(234, "mc_STOREV32-slow4");
   4294          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4295       }
   4296    }
   4297 #endif
   4298 }
   4299 
   4300 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
   4301 {
   4302    mc_STOREV32(a, vbits32, True);
   4303 }
   4304 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
   4305 {
   4306    mc_STOREV32(a, vbits32, False);
   4307 }
   4308 
   4309 
   4310 /* ------------------------ Size = 2 ------------------------ */
   4311 
   4312 static INLINE
   4313 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
   4314 {
   4315    PROF_EVENT(240, "mc_LOADV16");
   4316 
   4317 #ifndef PERF_FAST_LOADV
   4318    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4319 #else
   4320    {
   4321       UWord   sm_off, vabits8;
   4322       SecMap* sm;
   4323 
   4324       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   4325          PROF_EVENT(241, "mc_LOADV16-slow1");
   4326          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4327       }
   4328 
   4329       sm      = get_secmap_for_reading_low(a);
   4330       sm_off  = SM_OFF(a);
   4331       vabits8 = sm->vabits8[sm_off];
   4332       // Handle common case quickly: a is suitably aligned, is mapped, and is
   4333       // addressible.
   4334       // Convert V bits from compact memory form to expanded register form
   4335       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
   4336       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
   4337       else {
   4338          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   4339          // the two sub-bytes.
   4340          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
   4341          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
   4342          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
   4343          else {
   4344             /* Slow case: the two bytes are not all-defined or all-undefined. */
   4345             PROF_EVENT(242, "mc_LOADV16-slow2");
   4346             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4347          }
   4348       }
   4349    }
   4350 #endif
   4351 }
   4352 
   4353 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
   4354 {
   4355    return mc_LOADV16(a, True);
   4356 }
   4357 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
   4358 {
   4359    return mc_LOADV16(a, False);
   4360 }
   4361 
   4362 
   4363 static INLINE
   4364 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
   4365 {
   4366    PROF_EVENT(250, "mc_STOREV16");
   4367 
   4368 #ifndef PERF_FAST_STOREV
   4369    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4370 #else
   4371    {
   4372       UWord   sm_off, vabits8;
   4373       SecMap* sm;
   4374 
   4375       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   4376          PROF_EVENT(251, "mc_STOREV16-slow1");
   4377          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4378          return;
   4379       }
   4380 
   4381       sm      = get_secmap_for_reading_low(a);
   4382       sm_off  = SM_OFF(a);
   4383       vabits8 = sm->vabits8[sm_off];
   4384       if (LIKELY( !is_distinguished_sm(sm) &&
   4385                           (VA_BITS8_DEFINED   == vabits8 ||
   4386                            VA_BITS8_UNDEFINED == vabits8) ))
   4387       {
   4388          /* Handle common case quickly: a is suitably aligned, */
   4389          /* is mapped, and is addressible. */
   4390          // Convert full V-bits in register to compact 2-bit form.
   4391          if (V_BITS16_DEFINED == vbits16) {
   4392             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED ,
   4393                                          &(sm->vabits8[sm_off]) );
   4394          } else if (V_BITS16_UNDEFINED == vbits16) {
   4395             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
   4396                                          &(sm->vabits8[sm_off]) );
   4397          } else {
   4398             /* Slow but general case -- writing partially defined bytes. */
   4399             PROF_EVENT(252, "mc_STOREV16-slow2");
   4400             mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4401          }
   4402       } else {
   4403          /* Slow but general case. */
   4404          PROF_EVENT(253, "mc_STOREV16-slow3");
   4405          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4406       }
   4407    }
   4408 #endif
   4409 }
   4410 
   4411 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
   4412 {
   4413    mc_STOREV16(a, vbits16, True);
   4414 }
   4415 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
   4416 {
   4417    mc_STOREV16(a, vbits16, False);
   4418 }
   4419 
   4420 
   4421 /* ------------------------ Size = 1 ------------------------ */
   4422 /* Note: endianness is irrelevant for size == 1 */
   4423 
   4424 VG_REGPARM(1)
   4425 UWord MC_(helperc_LOADV8) ( Addr a )
   4426 {
   4427    PROF_EVENT(260, "mc_LOADV8");
   4428 
   4429 #ifndef PERF_FAST_LOADV
   4430    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4431 #else
   4432    {
   4433       UWord   sm_off, vabits8;
   4434       SecMap* sm;
   4435 
   4436       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   4437          PROF_EVENT(261, "mc_LOADV8-slow1");
   4438          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4439       }
   4440 
   4441       sm      = get_secmap_for_reading_low(a);
   4442       sm_off  = SM_OFF(a);
   4443       vabits8 = sm->vabits8[sm_off];
   4444       // Convert V bits from compact memory form to expanded register form
   4445       // Handle common case quickly: a is mapped, and the entire
   4446       // word32 it lives in is addressible.
   4447       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
   4448       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
   4449       else {
   4450          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   4451          // the single byte.
   4452          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
   4453          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
   4454          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
   4455          else {
   4456             /* Slow case: the byte is not all-defined or all-undefined. */
   4457             PROF_EVENT(262, "mc_LOADV8-slow2");
   4458             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4459          }
   4460       }
   4461    }
   4462 #endif
   4463 }
   4464 
   4465 
   4466 VG_REGPARM(2)
   4467 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
   4468 {
   4469    PROF_EVENT(270, "mc_STOREV8");
   4470 
   4471 #ifndef PERF_FAST_STOREV
   4472    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4473 #else
   4474    {
   4475       UWord   sm_off, vabits8;
   4476       SecMap* sm;
   4477 
   4478       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   4479          PROF_EVENT(271, "mc_STOREV8-slow1");
   4480          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4481          return;
   4482       }
   4483 
   4484       sm      = get_secmap_for_reading_low(a);
   4485       sm_off  = SM_OFF(a);
   4486       vabits8 = sm->vabits8[sm_off];
   4487       if (LIKELY
   4488             ( !is_distinguished_sm(sm) &&
   4489               ( (VA_BITS8_DEFINED == vabits8 || VA_BITS8_UNDEFINED == vabits8)
   4490              || (VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8))
   4491               )
   4492             )
   4493          )
   4494       {
   4495          /* Handle common case quickly: a is mapped, the entire word32 it
   4496             lives in is addressible. */
   4497          // Convert full V-bits in register to compact 2-bit form.
   4498          if (V_BITS8_DEFINED == vbits8) {
   4499             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
   4500                                           &(sm->vabits8[sm_off]) );
   4501          } else if (V_BITS8_UNDEFINED == vbits8) {
   4502             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
   4503                                           &(sm->vabits8[sm_off]) );
   4504          } else {
   4505             /* Slow but general case -- writing partially defined bytes. */
   4506             PROF_EVENT(272, "mc_STOREV8-slow2");
   4507             mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4508          }
   4509       } else {
   4510          /* Slow but general case. */
   4511          PROF_EVENT(273, "mc_STOREV8-slow3");
   4512          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4513       }
   4514    }
   4515 #endif
   4516 }
   4517 
   4518 
   4519 /*------------------------------------------------------------*/
   4520 /*--- Functions called directly from generated code:       ---*/
   4521 /*--- Value-check failure handlers.                        ---*/
   4522 /*------------------------------------------------------------*/
   4523 
   4524 /* Call these ones when an origin is available ... */
   4525 VG_REGPARM(1)
   4526 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
   4527    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
   4528 }
   4529 
   4530 VG_REGPARM(1)
   4531 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
   4532    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
   4533 }
   4534 
   4535 VG_REGPARM(1)
   4536 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
   4537    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
   4538 }
   4539 
   4540 VG_REGPARM(1)
   4541 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
   4542    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
   4543 }
   4544 
   4545 VG_REGPARM(2)
   4546 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
   4547    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
   4548 }
   4549 
   4550 /* ... and these when an origin isn't available. */
   4551 
   4552 VG_REGPARM(0)
   4553 void MC_(helperc_value_check0_fail_no_o) ( void ) {
   4554    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
   4555 }
   4556 
   4557 VG_REGPARM(0)
   4558 void MC_(helperc_value_check1_fail_no_o) ( void ) {
   4559    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
   4560 }
   4561 
   4562 VG_REGPARM(0)
   4563 void MC_(helperc_value_check4_fail_no_o) ( void ) {
   4564    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
   4565 }
   4566 
   4567 VG_REGPARM(0)
   4568 void MC_(helperc_value_check8_fail_no_o) ( void ) {
   4569    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
   4570 }
   4571 
   4572 VG_REGPARM(1)
   4573 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
   4574    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
   4575 }
   4576 
   4577 
   4578 /*------------------------------------------------------------*/
   4579 /*--- Metadata get/set functions, for client requests.     ---*/
   4580 /*------------------------------------------------------------*/
   4581 
   4582 // Nb: this expands the V+A bits out into register-form V bits, even though
   4583 // they're in memory.  This is for backward compatibility, and because it's
   4584 // probably what the user wants.
   4585 
   4586 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
   4587    error [no longer used], 3 == addressing error. */
   4588 /* Nb: We used to issue various definedness/addressability errors from here,
   4589    but we took them out because they ranged from not-very-helpful to
   4590    downright annoying, and they complicated the error data structures. */
   4591 static Int mc_get_or_set_vbits_for_client (
   4592    Addr a,
   4593    Addr vbits,
   4594    SizeT szB,
   4595    Bool setting, /* True <=> set vbits,  False <=> get vbits */
   4596    Bool is_client_request /* True <=> real user request
   4597                              False <=> internal call from gdbserver */
   4598 )
   4599 {
   4600    SizeT i;
   4601    Bool  ok;
   4602    UChar vbits8;
   4603 
   4604    /* Check that arrays are addressible before doing any getting/setting.
   4605       vbits to be checked only for real user request. */
   4606    for (i = 0; i < szB; i++) {
   4607       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
   4608           (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
   4609          return 3;
   4610       }
   4611    }
   4612 
   4613    /* Do the copy */
   4614    if (setting) {
   4615       /* setting */
   4616       for (i = 0; i < szB; i++) {
   4617          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
   4618          tl_assert(ok);
   4619       }
   4620    } else {
   4621       /* getting */
   4622       for (i = 0; i < szB; i++) {
   4623          ok = get_vbits8(a + i, &vbits8);
   4624          tl_assert(ok);
   4625          ((UChar*)vbits)[i] = vbits8;
   4626       }
   4627       if (is_client_request)
   4628         // The bytes in vbits[] have now been set, so mark them as such.
   4629         MC_(make_mem_defined)(vbits, szB);
   4630    }
   4631 
   4632    return 1;
   4633 }
   4634 
   4635 
   4636 /*------------------------------------------------------------*/
   4637 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
   4638 /*------------------------------------------------------------*/
   4639 
   4640 /* For the memory leak detector, say whether an entire 64k chunk of
   4641    address space is possibly in use, or not.  If in doubt return
   4642    True.
   4643 */
   4644 Bool MC_(is_within_valid_secondary) ( Addr a )
   4645 {
   4646    SecMap* sm = maybe_get_secmap_for ( a );
   4647    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
   4648       /* Definitely not in use. */
   4649       return False;
   4650    } else {
   4651       return True;
   4652    }
   4653 }
   4654 
   4655 
   4656 /* For the memory leak detector, say whether or not a given word
   4657    address is to be regarded as valid. */
   4658 Bool MC_(is_valid_aligned_word) ( Addr a )
   4659 {
   4660    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
   4661    tl_assert(VG_IS_WORD_ALIGNED(a));
   4662    if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
   4663       return False;
   4664    if (sizeof(UWord) == 8) {
   4665       if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
   4666          return False;
   4667    }
   4668    if (UNLIKELY(MC_(in_ignored_range)(a)))
   4669       return False;
   4670    else
   4671       return True;
   4672 }
   4673 
   4674 
   4675 /*------------------------------------------------------------*/
   4676 /*--- Initialisation                                       ---*/
   4677 /*------------------------------------------------------------*/
   4678 
   4679 static void init_shadow_memory ( void )
   4680 {
   4681    Int     i;
   4682    SecMap* sm;
   4683 
   4684    tl_assert(V_BIT_UNDEFINED   == 1);
   4685    tl_assert(V_BIT_DEFINED     == 0);
   4686    tl_assert(V_BITS8_UNDEFINED == 0xFF);
   4687    tl_assert(V_BITS8_DEFINED   == 0);
   4688 
   4689    /* Build the 3 distinguished secondaries */
   4690    sm = &sm_distinguished[SM_DIST_NOACCESS];
   4691    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
   4692 
   4693    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   4694    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
   4695 
   4696    sm = &sm_distinguished[SM_DIST_DEFINED];
   4697    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
   4698 
   4699    /* Set up the primary map. */
   4700    /* These entries gradually get overwritten as the used address
   4701       space expands. */
   4702    for (i = 0; i < N_PRIMARY_MAP; i++)
   4703       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
   4704 
   4705    /* Auxiliary primary maps */
   4706    init_auxmap_L1_L2();
   4707 
   4708    /* auxmap_size = auxmap_used = 0;
   4709       no ... these are statically initialised */
   4710 
   4711    /* Secondary V bit table */
   4712    secVBitTable = createSecVBitTable();
   4713 }
   4714 
   4715 
   4716 /*------------------------------------------------------------*/
   4717 /*--- Sanity check machinery (permanently engaged)         ---*/
   4718 /*------------------------------------------------------------*/
   4719 
   4720 static Bool mc_cheap_sanity_check ( void )
   4721 {
   4722    n_sanity_cheap++;
   4723    PROF_EVENT(490, "cheap_sanity_check");
   4724    /* Check for sane operating level */
   4725    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   4726       return False;
   4727    /* nothing else useful we can rapidly check */
   4728    return True;
   4729 }
   4730 
   4731 static Bool mc_expensive_sanity_check ( void )
   4732 {
   4733    Int     i;
   4734    Word    n_secmaps_found;
   4735    SecMap* sm;
   4736    HChar*  errmsg;
   4737    Bool    bad = False;
   4738 
   4739    if (0) VG_(printf)("expensive sanity check\n");
   4740    if (0) return True;
   4741 
   4742    n_sanity_expensive++;
   4743    PROF_EVENT(491, "expensive_sanity_check");
   4744 
   4745    /* Check for sane operating level */
   4746    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   4747       return False;
   4748 
   4749    /* Check that the 3 distinguished SMs are still as they should be. */
   4750 
   4751    /* Check noaccess DSM. */
   4752    sm = &sm_distinguished[SM_DIST_NOACCESS];
   4753    for (i = 0; i < SM_CHUNKS; i++)
   4754       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
   4755          bad = True;
   4756 
   4757    /* Check undefined DSM. */
   4758    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   4759    for (i = 0; i < SM_CHUNKS; i++)
   4760       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
   4761          bad = True;
   4762 
   4763    /* Check defined DSM. */
   4764    sm = &sm_distinguished[SM_DIST_DEFINED];
   4765    for (i = 0; i < SM_CHUNKS; i++)
   4766       if (sm->vabits8[i] != VA_BITS8_DEFINED)
   4767          bad = True;
   4768 
   4769    if (bad) {
   4770       VG_(printf)("memcheck expensive sanity: "
   4771                   "distinguished_secondaries have changed\n");
   4772       return False;
   4773    }
   4774 
   4775    /* If we're not checking for undefined value errors, the secondary V bit
   4776     * table should be empty. */
   4777    if (MC_(clo_mc_level) == 1) {
   4778       if (0 != VG_(OSetGen_Size)(secVBitTable))
   4779          return False;
   4780    }
   4781 
   4782    /* check the auxiliary maps, very thoroughly */
   4783    n_secmaps_found = 0;
   4784    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
   4785    if (errmsg) {
   4786       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
   4787       return False;
   4788    }
   4789 
   4790    /* n_secmaps_found is now the number referred to by the auxiliary
   4791       primary map.  Now add on the ones referred to by the main
   4792       primary map. */
   4793    for (i = 0; i < N_PRIMARY_MAP; i++) {
   4794       if (primary_map[i] == NULL) {
   4795          bad = True;
   4796       } else {
   4797          if (!is_distinguished_sm(primary_map[i]))
   4798             n_secmaps_found++;
   4799       }
   4800    }
   4801 
   4802    /* check that the number of secmaps issued matches the number that
   4803       are reachable (iow, no secmap leaks) */
   4804    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
   4805       bad = True;
   4806 
   4807    if (bad) {
   4808       VG_(printf)("memcheck expensive sanity: "
   4809                   "apparent secmap leakage\n");
   4810       return False;
   4811    }
   4812 
   4813    if (bad) {
   4814       VG_(printf)("memcheck expensive sanity: "
   4815                   "auxmap covers wrong address space\n");
   4816       return False;
   4817    }
   4818 
   4819    /* there is only one pointer to each secmap (expensive) */
   4820 
   4821    return True;
   4822 }
   4823 
   4824 /*------------------------------------------------------------*/
   4825 /*--- Command line args                                    ---*/
   4826 /*------------------------------------------------------------*/
   4827 
   4828 Bool          MC_(clo_partial_loads_ok)       = False;
   4829 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
   4830 Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
   4831 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
   4832 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
   4833 Bool          MC_(clo_show_reachable)         = False;
   4834 Bool          MC_(clo_show_possibly_lost)     = True;
   4835 Bool          MC_(clo_workaround_gcc296_bugs) = False;
   4836 Int           MC_(clo_malloc_fill)            = -1;
   4837 Int           MC_(clo_free_fill)              = -1;
   4838 Int           MC_(clo_mc_level)               = 2;
   4839 
   4840 static Bool mc_process_cmd_line_options(Char* arg)
   4841 {
   4842    Char* tmp_str;
   4843 
   4844    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   4845 
   4846    /* Set MC_(clo_mc_level):
   4847          1 = A bit tracking only
   4848          2 = A and V bit tracking, but no V bit origins
   4849          3 = A and V bit tracking, and V bit origins
   4850 
   4851       Do this by inspecting --undef-value-errors= and
   4852       --track-origins=.  Reject the case --undef-value-errors=no
   4853       --track-origins=yes as meaningless.
   4854    */
   4855    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
   4856       if (MC_(clo_mc_level) == 3) {
   4857          goto bad_level;
   4858       } else {
   4859          MC_(clo_mc_level) = 1;
   4860          return True;
   4861       }
   4862    }
   4863    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
   4864       if (MC_(clo_mc_level) == 1)
   4865          MC_(clo_mc_level) = 2;
   4866       return True;
   4867    }
   4868    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
   4869       if (MC_(clo_mc_level) == 3)
   4870          MC_(clo_mc_level) = 2;
   4871       return True;
   4872    }
   4873    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
   4874       if (MC_(clo_mc_level) == 1) {
   4875          goto bad_level;
   4876       } else {
   4877          MC_(clo_mc_level) = 3;
   4878          return True;
   4879       }
   4880    }
   4881 
   4882 	if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
   4883    else if VG_BOOL_CLO(arg, "--show-reachable",   MC_(clo_show_reachable))   {}
   4884    else if VG_BOOL_CLO(arg, "--show-possibly-lost",
   4885                                             MC_(clo_show_possibly_lost))     {}
   4886    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
   4887                                             MC_(clo_workaround_gcc296_bugs)) {}
   4888 
   4889    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
   4890                                                0, 10*1000*1000*1000LL) {}
   4891 
   4892    else if VG_BINT_CLO(arg, "--freelist-big-blocks",
   4893                        MC_(clo_freelist_big_blocks),
   4894                        0, 10*1000*1000*1000LL) {}
   4895 
   4896    else if VG_XACT_CLO(arg, "--leak-check=no",
   4897                             MC_(clo_leak_check), LC_Off) {}
   4898    else if VG_XACT_CLO(arg, "--leak-check=summary",
   4899                             MC_(clo_leak_check), LC_Summary) {}
   4900    else if VG_XACT_CLO(arg, "--leak-check=yes",
   4901                             MC_(clo_leak_check), LC_Full) {}
   4902    else if VG_XACT_CLO(arg, "--leak-check=full",
   4903                             MC_(clo_leak_check), LC_Full) {}
   4904 
   4905    else if VG_XACT_CLO(arg, "--leak-resolution=low",
   4906                             MC_(clo_leak_resolution), Vg_LowRes) {}
   4907    else if VG_XACT_CLO(arg, "--leak-resolution=med",
   4908                             MC_(clo_leak_resolution), Vg_MedRes) {}
   4909    else if VG_XACT_CLO(arg, "--leak-resolution=high",
   4910                             MC_(clo_leak_resolution), Vg_HighRes) {}
   4911 
   4912    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
   4913       Int  i;
   4914       Bool ok  = parse_ignore_ranges(tmp_str);
   4915       if (!ok)
   4916         return False;
   4917       tl_assert(ignoreRanges.used >= 0);
   4918       tl_assert(ignoreRanges.used < M_IGNORE_RANGES);
   4919       for (i = 0; i < ignoreRanges.used; i++) {
   4920          Addr s = ignoreRanges.start[i];
   4921          Addr e = ignoreRanges.end[i];
   4922          Addr limit = 0x4000000; /* 64M - entirely arbitrary limit */
   4923          if (e <= s) {
   4924             VG_(message)(Vg_DebugMsg,
   4925                "ERROR: --ignore-ranges: end <= start in range:\n");
   4926             VG_(message)(Vg_DebugMsg,
   4927                "       0x%lx-0x%lx\n", s, e);
   4928             return False;
   4929          }
   4930          if (e - s > limit) {
   4931             VG_(message)(Vg_DebugMsg,
   4932                "ERROR: --ignore-ranges: suspiciously large range:\n");
   4933             VG_(message)(Vg_DebugMsg,
   4934                "       0x%lx-0x%lx (size %ld)\n", s, e, (UWord)(e-s));
   4935             return False;
   4936 	 }
   4937       }
   4938    }
   4939 
   4940    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
   4941    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
   4942 
   4943    else
   4944       return VG_(replacement_malloc_process_cmd_line_option)(arg);
   4945 
   4946    return True;
   4947 
   4948 
   4949   bad_level:
   4950    VG_(fmsg_bad_option)(arg,
   4951       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
   4952 }
   4953 
   4954 static void mc_print_usage(void)
   4955 {
   4956    VG_(printf)(
   4957 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
   4958 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
   4959 "    --show-reachable=no|yes          show reachable blocks in leak check? [no]\n"
   4960 "    --show-possibly-lost=no|yes      show possibly lost blocks in leak check?\n"
   4961 "                                     [yes]\n"
   4962 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
   4963 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
   4964 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [no]\n"
   4965 "    --freelist-vol=<number>          volume of freed blocks queue      [20000000]\n"
   4966 "    --freelist-big-blocks=<number>   releases first blocks with size >= [1000000]\n"
   4967 "    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
   4968 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
   4969 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
   4970 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
   4971    );
   4972 }
   4973 
   4974 static void mc_print_debug_usage(void)
   4975 {
   4976    VG_(printf)(
   4977 "    (none)\n"
   4978    );
   4979 }
   4980 
   4981 
   4982 /*------------------------------------------------------------*/
   4983 /*--- Client blocks                                        ---*/
   4984 /*------------------------------------------------------------*/
   4985 
   4986 /* Client block management:
   4987 
   4988    This is managed as an expanding array of client block descriptors.
   4989    Indices of live descriptors are issued to the client, so it can ask
   4990    to free them later.  Therefore we cannot slide live entries down
   4991    over dead ones.  Instead we must use free/inuse flags and scan for
   4992    an empty slot at allocation time.  This in turn means allocation is
   4993    relatively expensive, so we hope this does not happen too often.
   4994 
   4995    An unused block has start == size == 0
   4996 */
   4997 
   4998 /* type CGenBlock is defined in mc_include.h */
   4999 
   5000 /* This subsystem is self-initialising. */
   5001 static UWord      cgb_size = 0;
   5002 static UWord      cgb_used = 0;
   5003 static CGenBlock* cgbs     = NULL;
   5004 
   5005 /* Stats for this subsystem. */
   5006 static ULong cgb_used_MAX = 0;   /* Max in use. */
   5007 static ULong cgb_allocs   = 0;   /* Number of allocs. */
   5008 static ULong cgb_discards = 0;   /* Number of discards. */
   5009 static ULong cgb_search   = 0;   /* Number of searches. */
   5010 
   5011 
   5012 /* Get access to the client block array. */
   5013 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
   5014                                  /*OUT*/UWord* nBlocks )
   5015 {
   5016    *blocks  = cgbs;
   5017    *nBlocks = cgb_used;
   5018 }
   5019 
   5020 
   5021 static
   5022 Int alloc_client_block ( void )
   5023 {
   5024    UWord      i, sz_new;
   5025    CGenBlock* cgbs_new;
   5026 
   5027    cgb_allocs++;
   5028 
   5029    for (i = 0; i < cgb_used; i++) {
   5030       cgb_search++;
   5031       if (cgbs[i].start == 0 && cgbs[i].size == 0)
   5032          return i;
   5033    }
   5034 
   5035    /* Not found.  Try to allocate one at the end. */
   5036    if (cgb_used < cgb_size) {
   5037       cgb_used++;
   5038       return cgb_used-1;
   5039    }
   5040 
   5041    /* Ok, we have to allocate a new one. */
   5042    tl_assert(cgb_used == cgb_size);
   5043    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
   5044 
   5045    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
   5046    for (i = 0; i < cgb_used; i++)
   5047       cgbs_new[i] = cgbs[i];
   5048 
   5049    if (cgbs != NULL)
   5050       VG_(free)( cgbs );
   5051    cgbs = cgbs_new;
   5052 
   5053    cgb_size = sz_new;
   5054    cgb_used++;
   5055    if (cgb_used > cgb_used_MAX)
   5056       cgb_used_MAX = cgb_used;
   5057    return cgb_used-1;
   5058 }
   5059 
   5060 
   5061 static void show_client_block_stats ( void )
   5062 {
   5063    VG_(message)(Vg_DebugMsg,
   5064       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
   5065       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
   5066    );
   5067 }
   5068 static void print_monitor_help ( void )
   5069 {
   5070    VG_(gdb_printf)
   5071       (
   5072 "\n"
   5073 "memcheck monitor commands:\n"
   5074 "  get_vbits <addr> [<len>]\n"
   5075 "        returns validity bits for <len> (or 1) bytes at <addr>\n"
   5076 "            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
   5077 "        Example: get_vbits 0x8049c78 10\n"
   5078 "  make_memory [noaccess|undefined\n"
   5079 "                     |defined|Definedifaddressable] <addr> [<len>]\n"
   5080 "        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
   5081 "  check_memory [addressable|defined] <addr> [<len>]\n"
   5082 "        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
   5083 "            and outputs a description of <addr>\n"
   5084 "  leak_check [full*|summary] [reachable|possibleleak*|definiteleak]\n"
   5085 "                [increased*|changed|any]\n"
   5086 "                [unlimited*|limited <max_loss_records_output>]\n"
   5087 "            * = defaults\n"
   5088 "        Examples: leak_check\n"
   5089 "                  leak_check summary any\n"
   5090 "                  leak_check full reachable any limited 100\n"
   5091 "  block_list <loss_record_nr>\n"
   5092 "        after a leak search, shows the list of blocks of <loss_record_nr>\n"
   5093 "  who_points_at <addr> [<len>]\n"
   5094 "        shows places pointing inside <len> (default 1) bytes at <addr>\n"
   5095 "        (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
   5096 "         with len > 1, will also show \"interior pointers\")\n"
   5097 "\n");
   5098 }
   5099 
   5100 /* return True if request recognised, False otherwise */
   5101 static Bool handle_gdb_monitor_command (ThreadId tid, Char *req)
   5102 {
   5103    Char* wcmd;
   5104    Char s[VG_(strlen(req))]; /* copy for strtok_r */
   5105    Char *ssaveptr;
   5106 
   5107    VG_(strcpy) (s, req);
   5108 
   5109    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
   5110    /* NB: if possible, avoid introducing a new command below which
   5111       starts with the same first letter(s) as an already existing
   5112       command. This ensures a shorter abbreviation for the user. */
   5113    switch (VG_(keyword_id)
   5114            ("help get_vbits leak_check make_memory check_memory "
   5115             "block_list who_points_at",
   5116             wcmd, kwd_report_duplicated_matches)) {
   5117    case -2: /* multiple matches */
   5118       return True;
   5119    case -1: /* not found */
   5120       return False;
   5121    case  0: /* help */
   5122       print_monitor_help();
   5123       return True;
   5124    case  1: { /* get_vbits */
   5125       Addr address;
   5126       SizeT szB = 1;
   5127       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
   5128       if (szB != 0) {
   5129          UChar vbits;
   5130          Int i;
   5131          Int unaddressable = 0;
   5132          for (i = 0; i < szB; i++) {
   5133             Int res = mc_get_or_set_vbits_for_client
   5134                (address+i, (Addr) &vbits, 1,
   5135                 False, /* get them */
   5136                 False  /* is client request */ );
   5137             /* we are before the first character on next line, print a \n. */
   5138             if ((i % 32) == 0 && i != 0)
   5139                VG_(gdb_printf) ("\n");
   5140             /* we are before the next block of 4 starts, print a space. */
   5141             else if ((i % 4) == 0 && i != 0)
   5142                VG_(gdb_printf) (" ");
   5143             if (res == 1) {
   5144                VG_(gdb_printf) ("%02x", vbits);
   5145             } else {
   5146                tl_assert(3 == res);
   5147                unaddressable++;
   5148                VG_(gdb_printf) ("__");
   5149             }
   5150          }
   5151          VG_(gdb_printf) ("\n");
   5152          if (unaddressable) {
   5153             VG_(gdb_printf)
   5154                ("Address %p len %ld has %d bytes unaddressable\n",
   5155                 (void *)address, szB, unaddressable);
   5156          }
   5157       }
   5158       return True;
   5159    }
   5160    case  2: { /* leak_check */
   5161       Int err = 0;
   5162       LeakCheckParams lcp;
   5163       Char* kw;
   5164 
   5165       lcp.mode               = LC_Full;
   5166       lcp.show_reachable     = False;
   5167       lcp.show_possibly_lost = True;
   5168       lcp.deltamode          = LCD_Increased;
   5169       lcp.max_loss_records_output = 999999999;
   5170       lcp.requested_by_monitor_command = True;
   5171 
   5172       for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5173            kw != NULL;
   5174            kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
   5175          switch (VG_(keyword_id)
   5176                  ("full summary "
   5177                   "reachable possibleleak definiteleak "
   5178                   "increased changed any "
   5179                   "unlimited limited ",
   5180                   kw, kwd_report_all)) {
   5181          case -2: err++; break;
   5182          case -1: err++; break;
   5183          case  0: /* full */
   5184             lcp.mode = LC_Full; break;
   5185          case  1: /* summary */
   5186             lcp.mode = LC_Summary; break;
   5187          case  2: /* reachable */
   5188             lcp.show_reachable = True;
   5189             lcp.show_possibly_lost = True; break;
   5190          case  3: /* possibleleak */
   5191             lcp.show_reachable = False;
   5192             lcp.show_possibly_lost = True; break;
   5193          case  4: /* definiteleak */
   5194             lcp.show_reachable = False;
   5195             lcp.show_possibly_lost = False; break;
   5196          case  5: /* increased */
   5197             lcp.deltamode = LCD_Increased; break;
   5198          case  6: /* changed */
   5199             lcp.deltamode = LCD_Changed; break;
   5200          case  7: /* any */
   5201             lcp.deltamode = LCD_Any; break;
   5202          case  8: /* unlimited */
   5203             lcp.max_loss_records_output = 999999999; break;
   5204          case  9: { /* limited */
   5205             int int_value;
   5206             char* endptr;
   5207 
   5208             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5209             if (wcmd == NULL) {
   5210                int_value = 0;
   5211                endptr = "empty"; /* to report an error below */
   5212             } else {
   5213                int_value = VG_(strtoll10) (wcmd, (Char **)&endptr);
   5214             }
   5215             if (*endptr != '\0')
   5216                VG_(gdb_printf) ("missing or malformed integer value\n");
   5217             else if (int_value > 0)
   5218                lcp.max_loss_records_output = (UInt) int_value;
   5219             else
   5220                VG_(gdb_printf) ("max_loss_records_output must be >= 1, got %d\n",
   5221                                 int_value);
   5222             break;
   5223          }
   5224          default:
   5225             tl_assert (0);
   5226          }
   5227       }
   5228       if (!err)
   5229          MC_(detect_memory_leaks)(tid, &lcp);
   5230       return True;
   5231    }
   5232 
   5233    case  3: { /* make_memory */
   5234       Addr address;
   5235       SizeT szB = 1;
   5236       int kwdid = VG_(keyword_id)
   5237          ("noaccess undefined defined Definedifaddressable",
   5238           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
   5239       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
   5240       if (address == (Addr) 0 && szB == 0) return True;
   5241       switch (kwdid) {
   5242       case -2: break;
   5243       case -1: break;
   5244       case  0: MC_(make_mem_noaccess) (address, szB); break;
   5245       case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
   5246                                                     MC_OKIND_USER ); break;
   5247       case  2: MC_(make_mem_defined) ( address, szB ); break;
   5248       case  3: make_mem_defined_if_addressable ( address, szB ); break;;
   5249       default: tl_assert(0);
   5250       }
   5251       return True;
   5252    }
   5253 
   5254    case  4: { /* check_memory */
   5255       Addr address;
   5256       SizeT szB = 1;
   5257       Addr bad_addr;
   5258       UInt okind;
   5259       char* src;
   5260       UInt otag;
   5261       UInt ecu;
   5262       ExeContext* origin_ec;
   5263       MC_ReadResult res;
   5264 
   5265       int kwdid = VG_(keyword_id)
   5266          ("addressable defined",
   5267           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
   5268       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
   5269       if (address == (Addr) 0 && szB == 0) return True;
   5270       switch (kwdid) {
   5271       case -2: break;
   5272       case -1: break;
   5273       case  0:
   5274          if (is_mem_addressable ( address, szB, &bad_addr ))
   5275             VG_(gdb_printf) ("Address %p len %ld addressable\n",
   5276                              (void *)address, szB);
   5277          else
   5278             VG_(gdb_printf)
   5279                ("Address %p len %ld not addressable:\nbad address %p\n",
   5280                 (void *)address, szB, (void *) bad_addr);
   5281          MC_(pp_describe_addr) (address);
   5282          break;
   5283       case  1: res = is_mem_defined ( address, szB, &bad_addr, &otag );
   5284          if (MC_AddrErr == res)
   5285             VG_(gdb_printf)
   5286                ("Address %p len %ld not addressable:\nbad address %p\n",
   5287                 (void *)address, szB, (void *) bad_addr);
   5288          else if (MC_ValueErr == res) {
   5289             okind = otag & 3;
   5290             switch (okind) {
   5291             case MC_OKIND_STACK:
   5292                src = " was created by a stack allocation"; break;
   5293             case MC_OKIND_HEAP:
   5294                src = " was created by a heap allocation"; break;
   5295             case MC_OKIND_USER:
   5296                src = " was created by a client request"; break;
   5297             case MC_OKIND_UNKNOWN:
   5298                src = ""; break;
   5299             default: tl_assert(0);
   5300             }
   5301             VG_(gdb_printf)
   5302                ("Address %p len %ld not defined:\n"
   5303                 "Uninitialised value at %p%s\n",
   5304                 (void *)address, szB, (void *) bad_addr, src);
   5305             ecu = otag & ~3;
   5306             if (VG_(is_plausible_ECU)(ecu)) {
   5307                origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
   5308                VG_(pp_ExeContext)( origin_ec );
   5309             }
   5310          }
   5311          else
   5312             VG_(gdb_printf) ("Address %p len %ld defined\n",
   5313                              (void *)address, szB);
   5314          MC_(pp_describe_addr) (address);
   5315          break;
   5316       default: tl_assert(0);
   5317       }
   5318       return True;
   5319    }
   5320 
   5321    case  5: { /* block_list */
   5322       Char* wl;
   5323       Char *endptr;
   5324       UInt lr_nr = 0;
   5325       wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5326       lr_nr = VG_(strtoull10) (wl, &endptr);
   5327       if (wl != NULL && *endptr != '\0') {
   5328          VG_(gdb_printf) ("malformed integer\n");
   5329       } else {
   5330          // lr_nr-1 as what is shown to the user is 1 more than the index in lr_array.
   5331          if (lr_nr == 0 || ! MC_(print_block_list) (lr_nr-1))
   5332             VG_(gdb_printf) ("invalid loss record nr\n");
   5333       }
   5334       return True;
   5335    }
   5336 
   5337    case  6: { /* who_points_at */
   5338       Addr address;
   5339       SizeT szB = 1;
   5340 
   5341       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
   5342       if (address == (Addr) 0) {
   5343          VG_(gdb_printf) ("Cannot search who points at 0x0\n");
   5344          return True;
   5345       }
   5346       MC_(who_points_at) (address, szB);
   5347       return True;
   5348    }
   5349 
   5350    default:
   5351       tl_assert(0);
   5352       return False;
   5353    }
   5354 }
   5355 
   5356 /*------------------------------------------------------------*/
   5357 /*--- Client requests                                      ---*/
   5358 /*------------------------------------------------------------*/
   5359 
   5360 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
   5361 {
   5362    Int   i;
   5363    Bool  ok;
   5364    Addr  bad_addr;
   5365 
   5366    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
   5367        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
   5368        && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
   5369        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
   5370        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
   5371        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
   5372        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
   5373        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
   5374        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
   5375        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
   5376        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
   5377        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
   5378        && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0])
   5379       return False;
   5380 
   5381    switch (arg[0]) {
   5382       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE:
   5383          ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
   5384          if (!ok)
   5385             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
   5386          *ret = ok ? (UWord)NULL : bad_addr;
   5387          break;
   5388 
   5389       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
   5390          Bool errorV    = False;
   5391          Addr bad_addrV = 0;
   5392          UInt otagV     = 0;
   5393          Bool errorA    = False;
   5394          Addr bad_addrA = 0;
   5395          is_mem_defined_comprehensive(
   5396             arg[1], arg[2],
   5397             &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
   5398          );
   5399          if (errorV) {
   5400             MC_(record_user_error) ( tid, bad_addrV,
   5401                                      /*isAddrErr*/False, otagV );
   5402          }
   5403          if (errorA) {
   5404             MC_(record_user_error) ( tid, bad_addrA,
   5405                                      /*isAddrErr*/True, 0 );
   5406          }
   5407          /* Return the lower of the two erring addresses, if any. */
   5408          *ret = 0;
   5409          if (errorV && !errorA) {
   5410             *ret = bad_addrV;
   5411          }
   5412          if (!errorV && errorA) {
   5413             *ret = bad_addrA;
   5414          }
   5415          if (errorV && errorA) {
   5416             *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
   5417          }
   5418          break;
   5419       }
   5420 
   5421       case VG_USERREQ__DO_LEAK_CHECK: {
   5422          LeakCheckParams lcp;
   5423 
   5424          if (arg[1] == 0)
   5425             lcp.mode = LC_Full;
   5426          else if (arg[1] == 1)
   5427             lcp.mode = LC_Summary;
   5428          else {
   5429             VG_(message)(Vg_UserMsg,
   5430                          "Warning: unknown memcheck leak search mode\n");
   5431             lcp.mode = LC_Full;
   5432          }
   5433 
   5434          lcp.show_reachable = MC_(clo_show_reachable);
   5435          lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
   5436 
   5437          if (arg[2] == 0)
   5438             lcp.deltamode = LCD_Any;
   5439          else if (arg[2] == 1)
   5440             lcp.deltamode = LCD_Increased;
   5441          else if (arg[2] == 2)
   5442             lcp.deltamode = LCD_Changed;
   5443          else {
   5444             VG_(message)
   5445                (Vg_UserMsg,
   5446                 "Warning: unknown memcheck leak search deltamode\n");
   5447             lcp.deltamode = LCD_Any;
   5448          }
   5449          lcp.max_loss_records_output = 999999999;
   5450          lcp.requested_by_monitor_command = False;
   5451 
   5452          MC_(detect_memory_leaks)(tid, &lcp);
   5453          *ret = 0; /* return value is meaningless */
   5454          break;
   5455       }
   5456 
   5457       case VG_USERREQ__MAKE_MEM_NOACCESS:
   5458          MC_(make_mem_noaccess) ( arg[1], arg[2] );
   5459          *ret = -1;
   5460          break;
   5461 
   5462       case VG_USERREQ__MAKE_MEM_UNDEFINED:
   5463          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
   5464                                               MC_OKIND_USER );
   5465          *ret = -1;
   5466          break;
   5467 
   5468       case VG_USERREQ__MAKE_MEM_DEFINED:
   5469          MC_(make_mem_defined) ( arg[1], arg[2] );
   5470          *ret = -1;
   5471          break;
   5472 
   5473       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
   5474          make_mem_defined_if_addressable ( arg[1], arg[2] );
   5475          *ret = -1;
   5476          break;
   5477 
   5478       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
   5479          if (arg[1] != 0 && arg[2] != 0) {
   5480             i = alloc_client_block();
   5481             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
   5482             cgbs[i].start = arg[1];
   5483             cgbs[i].size  = arg[2];
   5484             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (Char *)arg[3]);
   5485             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
   5486             *ret = i;
   5487          } else
   5488             *ret = -1;
   5489          break;
   5490 
   5491       case VG_USERREQ__DISCARD: /* discard */
   5492          if (cgbs == NULL
   5493              || arg[2] >= cgb_used ||
   5494              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
   5495             *ret = 1;
   5496          } else {
   5497             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
   5498             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
   5499             VG_(free)(cgbs[arg[2]].desc);
   5500             cgb_discards++;
   5501             *ret = 0;
   5502          }
   5503          break;
   5504 
   5505       case VG_USERREQ__GET_VBITS:
   5506          *ret = mc_get_or_set_vbits_for_client
   5507                    ( arg[1], arg[2], arg[3],
   5508                      False /* get them */,
   5509                      True /* is client request */ );
   5510          break;
   5511 
   5512       case VG_USERREQ__SET_VBITS:
   5513          *ret = mc_get_or_set_vbits_for_client
   5514                    ( arg[1], arg[2], arg[3],
   5515                      True /* set them */,
   5516                      True /* is client request */ );
   5517          break;
   5518 
   5519       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
   5520          UWord** argp = (UWord**)arg;
   5521          // MC_(bytes_leaked) et al were set by the last leak check (or zero
   5522          // if no prior leak checks performed).
   5523          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
   5524          *argp[2] = MC_(bytes_dubious);
   5525          *argp[3] = MC_(bytes_reachable);
   5526          *argp[4] = MC_(bytes_suppressed);
   5527          // there is no argp[5]
   5528          //*argp[5] = MC_(bytes_indirect);
   5529          // XXX need to make *argp[1-4] defined;  currently done in the
   5530          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
   5531          *ret = 0;
   5532          return True;
   5533       }
   5534       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
   5535          UWord** argp = (UWord**)arg;
   5536          // MC_(blocks_leaked) et al were set by the last leak check (or zero
   5537          // if no prior leak checks performed).
   5538          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
   5539          *argp[2] = MC_(blocks_dubious);
   5540          *argp[3] = MC_(blocks_reachable);
   5541          *argp[4] = MC_(blocks_suppressed);
   5542          // there is no argp[5]
   5543          //*argp[5] = MC_(blocks_indirect);
   5544          // XXX need to make *argp[1-4] defined;  currently done in the
   5545          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
   5546          *ret = 0;
   5547          return True;
   5548       }
   5549       case VG_USERREQ__MALLOCLIKE_BLOCK: {
   5550          Addr p         = (Addr)arg[1];
   5551          SizeT sizeB    =       arg[2];
   5552          UInt rzB       =       arg[3];
   5553          Bool is_zeroed = (Bool)arg[4];
   5554 
   5555          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
   5556                           MC_AllocCustom, MC_(malloc_list) );
   5557          if (rzB > 0) {
   5558             MC_(make_mem_noaccess) ( p - rzB, rzB);
   5559             MC_(make_mem_noaccess) ( p + sizeB, rzB);
   5560          }
   5561          return True;
   5562       }
   5563       case VG_USERREQ__RESIZEINPLACE_BLOCK: {
   5564          Addr p         = (Addr)arg[1];
   5565          SizeT oldSizeB =       arg[2];
   5566          SizeT newSizeB =       arg[3];
   5567          UInt rzB       =       arg[4];
   5568 
   5569          MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
   5570          return True;
   5571       }
   5572       case VG_USERREQ__FREELIKE_BLOCK: {
   5573          Addr p         = (Addr)arg[1];
   5574          UInt rzB       =       arg[2];
   5575 
   5576          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
   5577          return True;
   5578       }
   5579 
   5580       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
   5581          Char* s   = (Char*)arg[1];
   5582          Addr  dst = (Addr) arg[2];
   5583          Addr  src = (Addr) arg[3];
   5584          SizeT len = (SizeT)arg[4];
   5585          MC_(record_overlap_error)(tid, s, src, dst, len);
   5586          return True;
   5587       }
   5588 
   5589       case VG_USERREQ__CREATE_MEMPOOL: {
   5590          Addr pool      = (Addr)arg[1];
   5591          UInt rzB       =       arg[2];
   5592          Bool is_zeroed = (Bool)arg[3];
   5593 
   5594          MC_(create_mempool) ( pool, rzB, is_zeroed );
   5595          return True;
   5596       }
   5597 
   5598       case VG_USERREQ__DESTROY_MEMPOOL: {
   5599          Addr pool      = (Addr)arg[1];
   5600 
   5601          MC_(destroy_mempool) ( pool );
   5602          return True;
   5603       }
   5604 
   5605       case VG_USERREQ__MEMPOOL_ALLOC: {
   5606          Addr pool      = (Addr)arg[1];
   5607          Addr addr      = (Addr)arg[2];
   5608          UInt size      =       arg[3];
   5609 
   5610          MC_(mempool_alloc) ( tid, pool, addr, size );
   5611          return True;
   5612       }
   5613 
   5614       case VG_USERREQ__MEMPOOL_FREE: {
   5615          Addr pool      = (Addr)arg[1];
   5616          Addr addr      = (Addr)arg[2];
   5617 
   5618          MC_(mempool_free) ( pool, addr );
   5619          return True;
   5620       }
   5621 
   5622       case VG_USERREQ__MEMPOOL_TRIM: {
   5623          Addr pool      = (Addr)arg[1];
   5624          Addr addr      = (Addr)arg[2];
   5625          UInt size      =       arg[3];
   5626 
   5627          MC_(mempool_trim) ( pool, addr, size );
   5628          return True;
   5629       }
   5630 
   5631       case VG_USERREQ__MOVE_MEMPOOL: {
   5632          Addr poolA     = (Addr)arg[1];
   5633          Addr poolB     = (Addr)arg[2];
   5634 
   5635          MC_(move_mempool) ( poolA, poolB );
   5636          return True;
   5637       }
   5638 
   5639       case VG_USERREQ__MEMPOOL_CHANGE: {
   5640          Addr pool      = (Addr)arg[1];
   5641          Addr addrA     = (Addr)arg[2];
   5642          Addr addrB     = (Addr)arg[3];
   5643          UInt size      =       arg[4];
   5644 
   5645          MC_(mempool_change) ( pool, addrA, addrB, size );
   5646          return True;
   5647       }
   5648 
   5649       case VG_USERREQ__MEMPOOL_EXISTS: {
   5650          Addr pool      = (Addr)arg[1];
   5651 
   5652          *ret = (UWord) MC_(mempool_exists) ( pool );
   5653 	 return True;
   5654       }
   5655 
   5656       case VG_USERREQ__GDB_MONITOR_COMMAND: {
   5657          Bool handled = handle_gdb_monitor_command (tid, (Char*)arg[1]);
   5658          if (handled)
   5659             *ret = 1;
   5660          else
   5661             *ret = 0;
   5662          return handled;
   5663       }
   5664 
   5665       default:
   5666          VG_(message)(
   5667             Vg_UserMsg,
   5668             "Warning: unknown memcheck client request code %llx\n",
   5669             (ULong)arg[0]
   5670          );
   5671          return False;
   5672    }
   5673    return True;
   5674 }
   5675 
   5676 
   5677 /*------------------------------------------------------------*/
   5678 /*--- Crude profiling machinery.                           ---*/
   5679 /*------------------------------------------------------------*/
   5680 
   5681 // We track a number of interesting events (using PROF_EVENT)
   5682 // if MC_PROFILE_MEMORY is defined.
   5683 
   5684 #ifdef MC_PROFILE_MEMORY
   5685 
   5686 UInt   MC_(event_ctr)[N_PROF_EVENTS];
   5687 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
   5688 
   5689 static void init_prof_mem ( void )
   5690 {
   5691    Int i;
   5692    for (i = 0; i < N_PROF_EVENTS; i++) {
   5693       MC_(event_ctr)[i] = 0;
   5694       MC_(event_ctr_name)[i] = NULL;
   5695    }
   5696 }
   5697 
   5698 static void done_prof_mem ( void )
   5699 {
   5700    Int  i;
   5701    Bool spaced = False;
   5702    for (i = 0; i < N_PROF_EVENTS; i++) {
   5703       if (!spaced && (i % 10) == 0) {
   5704          VG_(printf)("\n");
   5705          spaced = True;
   5706       }
   5707       if (MC_(event_ctr)[i] > 0) {
   5708          spaced = False;
   5709          VG_(printf)( "prof mem event %3d: %9d   %s\n",
   5710                       i, MC_(event_ctr)[i],
   5711                       MC_(event_ctr_name)[i]
   5712                          ? MC_(event_ctr_name)[i] : "unnamed");
   5713       }
   5714    }
   5715 }
   5716 
   5717 #else
   5718 
   5719 static void init_prof_mem ( void ) { }
   5720 static void done_prof_mem ( void ) { }
   5721 
   5722 #endif
   5723 
   5724 
   5725 /*------------------------------------------------------------*/
   5726 /*--- Origin tracking stuff                                ---*/
   5727 /*------------------------------------------------------------*/
   5728 
   5729 /*--------------------------------------------*/
   5730 /*--- Origin tracking: load handlers       ---*/
   5731 /*--------------------------------------------*/
   5732 
   5733 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
   5734    return or1 > or2 ? or1 : or2;
   5735 }
   5736 
   5737 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
   5738    OCacheLine* line;
   5739    UChar descr;
   5740    UWord lineoff = oc_line_offset(a);
   5741    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   5742 
   5743    if (OC_ENABLE_ASSERTIONS) {
   5744       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5745    }
   5746 
   5747    line = find_OCacheLine( a );
   5748 
   5749    descr = line->descr[lineoff];
   5750    if (OC_ENABLE_ASSERTIONS) {
   5751       tl_assert(descr < 0x10);
   5752    }
   5753 
   5754    if (LIKELY(0 == (descr & (1 << byteoff))))  {
   5755       return 0;
   5756    } else {
   5757       return line->w32[lineoff];
   5758    }
   5759 }
   5760 
   5761 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
   5762    OCacheLine* line;
   5763    UChar descr;
   5764    UWord lineoff, byteoff;
   5765 
   5766    if (UNLIKELY(a & 1)) {
   5767       /* Handle misaligned case, slowly. */
   5768       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
   5769       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
   5770       return merge_origins(oLo, oHi);
   5771    }
   5772 
   5773    lineoff = oc_line_offset(a);
   5774    byteoff = a & 3; /* 0 or 2 */
   5775 
   5776    if (OC_ENABLE_ASSERTIONS) {
   5777       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5778    }
   5779    line = find_OCacheLine( a );
   5780 
   5781    descr = line->descr[lineoff];
   5782    if (OC_ENABLE_ASSERTIONS) {
   5783       tl_assert(descr < 0x10);
   5784    }
   5785 
   5786    if (LIKELY(0 == (descr & (3 << byteoff)))) {
   5787       return 0;
   5788    } else {
   5789       return line->w32[lineoff];
   5790    }
   5791 }
   5792 
   5793 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
   5794    OCacheLine* line;
   5795    UChar descr;
   5796    UWord lineoff;
   5797 
   5798    if (UNLIKELY(a & 3)) {
   5799       /* Handle misaligned case, slowly. */
   5800       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
   5801       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
   5802       return merge_origins(oLo, oHi);
   5803    }
   5804 
   5805    lineoff = oc_line_offset(a);
   5806    if (OC_ENABLE_ASSERTIONS) {
   5807       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5808    }
   5809 
   5810    line = find_OCacheLine( a );
   5811 
   5812    descr = line->descr[lineoff];
   5813    if (OC_ENABLE_ASSERTIONS) {
   5814       tl_assert(descr < 0x10);
   5815    }
   5816 
   5817    if (LIKELY(0 == descr)) {
   5818       return 0;
   5819    } else {
   5820       return line->w32[lineoff];
   5821    }
   5822 }
   5823 
   5824 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
   5825    OCacheLine* line;
   5826    UChar descrLo, descrHi, descr;
   5827    UWord lineoff;
   5828 
   5829    if (UNLIKELY(a & 7)) {
   5830       /* Handle misaligned case, slowly. */
   5831       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
   5832       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
   5833       return merge_origins(oLo, oHi);
   5834    }
   5835 
   5836    lineoff = oc_line_offset(a);
   5837    if (OC_ENABLE_ASSERTIONS) {
   5838       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   5839    }
   5840 
   5841    line = find_OCacheLine( a );
   5842 
   5843    descrLo = line->descr[lineoff + 0];
   5844    descrHi = line->descr[lineoff + 1];
   5845    descr   = descrLo | descrHi;
   5846    if (OC_ENABLE_ASSERTIONS) {
   5847       tl_assert(descr < 0x10);
   5848    }
   5849 
   5850    if (LIKELY(0 == descr)) {
   5851       return 0; /* both 32-bit chunks are defined */
   5852    } else {
   5853       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
   5854       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
   5855       return merge_origins(oLo, oHi);
   5856    }
   5857 }
   5858 
   5859 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
   5860    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
   5861    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
   5862    UInt oBoth = merge_origins(oLo, oHi);
   5863    return (UWord)oBoth;
   5864 }
   5865 
   5866 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
   5867    UInt oQ0   = (UInt)MC_(helperc_b_load8)( a + 0 );
   5868    UInt oQ1   = (UInt)MC_(helperc_b_load8)( a + 8 );
   5869    UInt oQ2   = (UInt)MC_(helperc_b_load8)( a + 16 );
   5870    UInt oQ3   = (UInt)MC_(helperc_b_load8)( a + 24 );
   5871    UInt oAll  = merge_origins(merge_origins(oQ0, oQ1),
   5872                               merge_origins(oQ2, oQ3));
   5873    return (UWord)oAll;
   5874 }
   5875 
   5876 
   5877 /*--------------------------------------------*/
   5878 /*--- Origin tracking: store handlers      ---*/
   5879 /*--------------------------------------------*/
   5880 
   5881 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
   5882    OCacheLine* line;
   5883    UWord lineoff = oc_line_offset(a);
   5884    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   5885 
   5886    if (OC_ENABLE_ASSERTIONS) {
   5887       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5888    }
   5889 
   5890    line = find_OCacheLine( a );
   5891 
   5892    if (d32 == 0) {
   5893       line->descr[lineoff] &= ~(1 << byteoff);
   5894    } else {
   5895       line->descr[lineoff] |= (1 << byteoff);
   5896       line->w32[lineoff] = d32;
   5897    }
   5898 }
   5899 
   5900 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
   5901    OCacheLine* line;
   5902    UWord lineoff, byteoff;
   5903 
   5904    if (UNLIKELY(a & 1)) {
   5905       /* Handle misaligned case, slowly. */
   5906       MC_(helperc_b_store1)( a + 0, d32 );
   5907       MC_(helperc_b_store1)( a + 1, d32 );
   5908       return;
   5909    }
   5910 
   5911    lineoff = oc_line_offset(a);
   5912    byteoff = a & 3; /* 0 or 2 */
   5913 
   5914    if (OC_ENABLE_ASSERTIONS) {
   5915       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5916    }
   5917 
   5918    line = find_OCacheLine( a );
   5919 
   5920    if (d32 == 0) {
   5921       line->descr[lineoff] &= ~(3 << byteoff);
   5922    } else {
   5923       line->descr[lineoff] |= (3 << byteoff);
   5924       line->w32[lineoff] = d32;
   5925    }
   5926 }
   5927 
   5928 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
   5929    OCacheLine* line;
   5930    UWord lineoff;
   5931 
   5932    if (UNLIKELY(a & 3)) {
   5933       /* Handle misaligned case, slowly. */
   5934       MC_(helperc_b_store2)( a + 0, d32 );
   5935       MC_(helperc_b_store2)( a + 2, d32 );
   5936       return;
   5937    }
   5938 
   5939    lineoff = oc_line_offset(a);
   5940    if (OC_ENABLE_ASSERTIONS) {
   5941       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5942    }
   5943 
   5944    line = find_OCacheLine( a );
   5945 
   5946    if (d32 == 0) {
   5947       line->descr[lineoff] = 0;
   5948    } else {
   5949       line->descr[lineoff] = 0xF;
   5950       line->w32[lineoff] = d32;
   5951    }
   5952 }
   5953 
   5954 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
   5955    OCacheLine* line;
   5956    UWord lineoff;
   5957 
   5958    if (UNLIKELY(a & 7)) {
   5959       /* Handle misaligned case, slowly. */
   5960       MC_(helperc_b_store4)( a + 0, d32 );
   5961       MC_(helperc_b_store4)( a + 4, d32 );
   5962       return;
   5963    }
   5964 
   5965    lineoff = oc_line_offset(a);
   5966    if (OC_ENABLE_ASSERTIONS) {
   5967       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   5968    }
   5969 
   5970    line = find_OCacheLine( a );
   5971 
   5972    if (d32 == 0) {
   5973       line->descr[lineoff + 0] = 0;
   5974       line->descr[lineoff + 1] = 0;
   5975    } else {
   5976       line->descr[lineoff + 0] = 0xF;
   5977       line->descr[lineoff + 1] = 0xF;
   5978       line->w32[lineoff + 0] = d32;
   5979       line->w32[lineoff + 1] = d32;
   5980    }
   5981 }
   5982 
   5983 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
   5984    MC_(helperc_b_store8)( a + 0, d32 );
   5985    MC_(helperc_b_store8)( a + 8, d32 );
   5986 }
   5987 
   5988 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
   5989    MC_(helperc_b_store8)( a +  0, d32 );
   5990    MC_(helperc_b_store8)( a +  8, d32 );
   5991    MC_(helperc_b_store8)( a + 16, d32 );
   5992    MC_(helperc_b_store8)( a + 24, d32 );
   5993 }
   5994 
   5995 
   5996 /*--------------------------------------------*/
   5997 /*--- Origin tracking: sarp handlers       ---*/
   5998 /*--------------------------------------------*/
   5999 
   6000 __attribute__((noinline))
   6001 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
   6002    if ((a & 1) && len >= 1) {
   6003       MC_(helperc_b_store1)( a, otag );
   6004       a++;
   6005       len--;
   6006    }
   6007    if ((a & 2) && len >= 2) {
   6008       MC_(helperc_b_store2)( a, otag );
   6009       a += 2;
   6010       len -= 2;
   6011    }
   6012    if (len >= 4)
   6013       tl_assert(0 == (a & 3));
   6014    while (len >= 4) {
   6015       MC_(helperc_b_store4)( a, otag );
   6016       a += 4;
   6017       len -= 4;
   6018    }
   6019    if (len >= 2) {
   6020       MC_(helperc_b_store2)( a, otag );
   6021       a += 2;
   6022       len -= 2;
   6023    }
   6024    if (len >= 1) {
   6025       MC_(helperc_b_store1)( a, otag );
   6026       //a++;
   6027       len--;
   6028    }
   6029    tl_assert(len == 0);
   6030 }
   6031 
   6032 __attribute__((noinline))
   6033 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
   6034    if ((a & 1) && len >= 1) {
   6035       MC_(helperc_b_store1)( a, 0 );
   6036       a++;
   6037       len--;
   6038    }
   6039    if ((a & 2) && len >= 2) {
   6040       MC_(helperc_b_store2)( a, 0 );
   6041       a += 2;
   6042       len -= 2;
   6043    }
   6044    if (len >= 4)
   6045       tl_assert(0 == (a & 3));
   6046    while (len >= 4) {
   6047       MC_(helperc_b_store4)( a, 0 );
   6048       a += 4;
   6049       len -= 4;
   6050    }
   6051    if (len >= 2) {
   6052       MC_(helperc_b_store2)( a, 0 );
   6053       a += 2;
   6054       len -= 2;
   6055    }
   6056    if (len >= 1) {
   6057       MC_(helperc_b_store1)( a, 0 );
   6058       //a++;
   6059       len--;
   6060    }
   6061    tl_assert(len == 0);
   6062 }
   6063 
   6064 
   6065 /*------------------------------------------------------------*/
   6066 /*--- Setup and finalisation                               ---*/
   6067 /*------------------------------------------------------------*/
   6068 
   6069 static void mc_post_clo_init ( void )
   6070 {
   6071    /* If we've been asked to emit XML, mash around various other
   6072       options so as to constrain the output somewhat. */
   6073    if (VG_(clo_xml)) {
   6074       /* Extract as much info as possible from the leak checker. */
   6075       /* MC_(clo_show_reachable) = True; */
   6076       MC_(clo_leak_check) = LC_Full;
   6077    }
   6078 
   6079    if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
   6080       VG_(message)(Vg_UserMsg,
   6081                    "Warning: --freelist-big-blocks value %lld has no effect\n"
   6082                    "as it is >= to --freelist-vol value %lld\n",
   6083                    MC_(clo_freelist_big_blocks),
   6084                    MC_(clo_freelist_vol));
   6085 
   6086    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   6087 
   6088    if (MC_(clo_mc_level) == 3) {
   6089       /* We're doing origin tracking. */
   6090 #     ifdef PERF_FAST_STACK
   6091       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
   6092       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
   6093       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
   6094       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
   6095       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
   6096       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
   6097       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
   6098       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
   6099       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
   6100 #     endif
   6101       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
   6102    } else {
   6103       /* Not doing origin tracking */
   6104 #     ifdef PERF_FAST_STACK
   6105       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
   6106       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
   6107       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
   6108       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
   6109       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
   6110       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
   6111       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
   6112       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
   6113       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
   6114 #     endif
   6115       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
   6116    }
   6117 
   6118    /* This origin tracking cache is huge (~100M), so only initialise
   6119       if we need it. */
   6120    if (MC_(clo_mc_level) >= 3) {
   6121       init_OCache();
   6122       tl_assert(ocacheL1 != NULL);
   6123       tl_assert(ocacheL2 != NULL);
   6124    } else {
   6125       tl_assert(ocacheL1 == NULL);
   6126       tl_assert(ocacheL2 == NULL);
   6127    }
   6128 
   6129    /* Do not check definedness of guest state if --undef-value-errors=no */
   6130    if (MC_(clo_mc_level) >= 2)
   6131       VG_(track_pre_reg_read) ( mc_pre_reg_read );
   6132 }
   6133 
   6134 static void print_SM_info(char* type, int n_SMs)
   6135 {
   6136    VG_(message)(Vg_DebugMsg,
   6137       " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
   6138       type,
   6139       n_SMs,
   6140       n_SMs * sizeof(SecMap) / 1024UL,
   6141       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
   6142 }
   6143 
   6144 static void mc_fini ( Int exitcode )
   6145 {
   6146    MC_(print_malloc_stats)();
   6147 
   6148    if (MC_(clo_leak_check) != LC_Off) {
   6149       LeakCheckParams lcp;
   6150       lcp.mode = MC_(clo_leak_check);
   6151       lcp.show_reachable = MC_(clo_show_reachable);
   6152       lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
   6153       lcp.deltamode = LCD_Any;
   6154       lcp.max_loss_records_output = 999999999;
   6155       lcp.requested_by_monitor_command = False;
   6156       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
   6157    } else {
   6158       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   6159          VG_(umsg)(
   6160             "For a detailed leak analysis, rerun with: --leak-check=full\n"
   6161             "\n"
   6162          );
   6163       }
   6164    }
   6165 
   6166    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   6167       VG_(message)(Vg_UserMsg,
   6168                    "For counts of detected and suppressed errors, rerun with: -v\n");
   6169    }
   6170 
   6171    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
   6172        && MC_(clo_mc_level) == 2) {
   6173       VG_(message)(Vg_UserMsg,
   6174                    "Use --track-origins=yes to see where "
   6175                    "uninitialised values come from\n");
   6176    }
   6177 
   6178    done_prof_mem();
   6179 
   6180    if (VG_(clo_stats)) {
   6181       SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
   6182 
   6183       VG_(message)(Vg_DebugMsg,
   6184          " memcheck: sanity checks: %d cheap, %d expensive\n",
   6185          n_sanity_cheap, n_sanity_expensive );
   6186       VG_(message)(Vg_DebugMsg,
   6187          " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
   6188          n_auxmap_L2_nodes,
   6189          n_auxmap_L2_nodes * 64,
   6190          n_auxmap_L2_nodes / 16 );
   6191       VG_(message)(Vg_DebugMsg,
   6192          " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
   6193          n_auxmap_L1_searches, n_auxmap_L1_cmps,
   6194          (10ULL * n_auxmap_L1_cmps)
   6195             / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
   6196       );
   6197       VG_(message)(Vg_DebugMsg,
   6198          " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
   6199          n_auxmap_L2_searches, n_auxmap_L2_nodes
   6200       );
   6201 
   6202       print_SM_info("n_issued     ", n_issued_SMs);
   6203       print_SM_info("n_deissued   ", n_deissued_SMs);
   6204       print_SM_info("max_noaccess ", max_noaccess_SMs);
   6205       print_SM_info("max_undefined", max_undefined_SMs);
   6206       print_SM_info("max_defined  ", max_defined_SMs);
   6207       print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
   6208 
   6209       // Three DSMs, plus the non-DSM ones
   6210       max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
   6211       // The 3*sizeof(Word) bytes is the AVL node metadata size.
   6212       // The VG_ROUNDUP is because the OSet pool allocator will/must align
   6213       // the elements on pointer size.
   6214       // Note that the pool allocator has some additional small overhead
   6215       // which is not counted in the below.
   6216       // Hardwiring this logic sucks, but I don't see how else to do it.
   6217       max_secVBit_szB = max_secVBit_nodes *
   6218             (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
   6219       max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
   6220 
   6221       VG_(message)(Vg_DebugMsg,
   6222          " memcheck: max sec V bit nodes:    %d (%ldk, %ldM)\n",
   6223          max_secVBit_nodes, max_secVBit_szB / 1024,
   6224                             max_secVBit_szB / (1024 * 1024));
   6225       VG_(message)(Vg_DebugMsg,
   6226          " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
   6227          sec_vbits_new_nodes + sec_vbits_updates,
   6228          sec_vbits_new_nodes, sec_vbits_updates );
   6229       VG_(message)(Vg_DebugMsg,
   6230          " memcheck: max shadow mem size:   %ldk, %ldM\n",
   6231          max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
   6232 
   6233       if (MC_(clo_mc_level) >= 3) {
   6234          VG_(message)(Vg_DebugMsg,
   6235                       " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
   6236                       stats_ocacheL1_find,
   6237                       stats_ocacheL1_misses,
   6238                       stats_ocacheL1_lossage );
   6239          VG_(message)(Vg_DebugMsg,
   6240                       " ocacheL1: %'12lu at 0   %'12lu at 1\n",
   6241                       stats_ocacheL1_find - stats_ocacheL1_misses
   6242                          - stats_ocacheL1_found_at_1
   6243                          - stats_ocacheL1_found_at_N,
   6244                       stats_ocacheL1_found_at_1 );
   6245          VG_(message)(Vg_DebugMsg,
   6246                       " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
   6247                       stats_ocacheL1_found_at_N,
   6248                       stats_ocacheL1_movefwds );
   6249          VG_(message)(Vg_DebugMsg,
   6250                       " ocacheL1: %'12lu sizeB  %'12u useful\n",
   6251                       (UWord)sizeof(OCache),
   6252                       4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
   6253          VG_(message)(Vg_DebugMsg,
   6254                       " ocacheL2: %'12lu refs   %'12lu misses\n",
   6255                       stats__ocacheL2_refs,
   6256                       stats__ocacheL2_misses );
   6257          VG_(message)(Vg_DebugMsg,
   6258                       " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
   6259                       stats__ocacheL2_n_nodes_max,
   6260                       stats__ocacheL2_n_nodes );
   6261          VG_(message)(Vg_DebugMsg,
   6262                       " niacache: %'12lu refs   %'12lu misses\n",
   6263                       stats__nia_cache_queries, stats__nia_cache_misses);
   6264       } else {
   6265          tl_assert(ocacheL1 == NULL);
   6266          tl_assert(ocacheL2 == NULL);
   6267       }
   6268    }
   6269 
   6270    if (0) {
   6271       VG_(message)(Vg_DebugMsg,
   6272         "------ Valgrind's client block stats follow ---------------\n" );
   6273       show_client_block_stats();
   6274    }
   6275 }
   6276 
   6277 /* mark the given addr/len unaddressable for watchpoint implementation
   6278    The PointKind will be handled at access time */
   6279 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
   6280                                                   Addr addr, SizeT len)
   6281 {
   6282    /* GDBTD this is somewhat fishy. We might rather have to save the previous
   6283       accessibility and definedness in gdbserver so as to allow restoring it
   6284       properly. Currently, we assume that the user only watches things
   6285       which are properly addressable and defined */
   6286    if (insert)
   6287       MC_(make_mem_noaccess) (addr, len);
   6288    else
   6289       MC_(make_mem_defined)  (addr, len);
   6290    return True;
   6291 }
   6292 
   6293 static void mc_pre_clo_init(void)
   6294 {
   6295    VG_(details_name)            ("Memcheck");
   6296    VG_(details_version)         (NULL);
   6297    VG_(details_description)     ("a memory error detector");
   6298    VG_(details_copyright_author)(
   6299       "Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.");
   6300    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   6301    VG_(details_avg_translation_sizeB) ( 640 );
   6302 
   6303    VG_(basic_tool_funcs)          (mc_post_clo_init,
   6304                                    MC_(instrument),
   6305                                    mc_fini);
   6306 
   6307    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
   6308 
   6309 
   6310    VG_(needs_core_errors)         ();
   6311    VG_(needs_tool_errors)         (MC_(eq_Error),
   6312                                    MC_(before_pp_Error),
   6313                                    MC_(pp_Error),
   6314                                    True,/*show TIDs for errors*/
   6315                                    MC_(update_Error_extra),
   6316                                    MC_(is_recognised_suppression),
   6317                                    MC_(read_extra_suppression_info),
   6318                                    MC_(error_matches_suppression),
   6319                                    MC_(get_error_name),
   6320                                    MC_(get_extra_suppression_info));
   6321    VG_(needs_libc_freeres)        ();
   6322    VG_(needs_command_line_options)(mc_process_cmd_line_options,
   6323                                    mc_print_usage,
   6324                                    mc_print_debug_usage);
   6325    VG_(needs_client_requests)     (mc_handle_client_request);
   6326    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
   6327                                    mc_expensive_sanity_check);
   6328    VG_(needs_malloc_replacement)  (MC_(malloc),
   6329                                    MC_(__builtin_new),
   6330                                    MC_(__builtin_vec_new),
   6331                                    MC_(memalign),
   6332                                    MC_(calloc),
   6333                                    MC_(free),
   6334                                    MC_(__builtin_delete),
   6335                                    MC_(__builtin_vec_delete),
   6336                                    MC_(realloc),
   6337                                    MC_(malloc_usable_size),
   6338                                    MC_MALLOC_DEFAULT_REDZONE_SZB );
   6339    MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
   6340 
   6341    VG_(needs_xml_output)          ();
   6342 
   6343    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
   6344    VG_(track_new_mem_stack_signal)( make_mem_undefined_w_tid );
   6345    // We assume that brk()/sbrk() does not initialise new memory.  Is this
   6346    // accurate?  John Reiser says:
   6347    //
   6348    //   0) sbrk() can *decrease* process address space.  No zero fill is done
   6349    //   for a decrease, not even the fragment on the high end of the last page
   6350    //   that is beyond the new highest address.  For maximum safety and
   6351    //   portability, then the bytes in the last page that reside above [the
   6352    //   new] sbrk(0) should be considered to be uninitialized, but in practice
   6353    //   it is exceedingly likely that they will retain their previous
   6354    //   contents.
   6355    //
   6356    //   1) If an increase is large enough to require new whole pages, then
   6357    //   those new whole pages (like all new pages) are zero-filled by the
   6358    //   operating system.  So if sbrk(0) already is page aligned, then
   6359    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
   6360    //
   6361    //   2) Any increase that lies within an existing allocated page is not
   6362    //   changed.  So if (x = sbrk(0)) is not page aligned, then
   6363    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
   6364    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
   6365    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
   6366    //   of them come along for the ride because the operating system deals
   6367    //   only in whole pages.  Again, for maximum safety and portability, then
   6368    //   anything that lives above [the new] sbrk(0) should be considered
   6369    //   uninitialized, but in practice will retain previous contents [zero in
   6370    //   this case.]"
   6371    //
   6372    // In short:
   6373    //
   6374    //   A key property of sbrk/brk is that new whole pages that are supplied
   6375    //   by the operating system *do* get initialized to zero.
   6376    //
   6377    // As for the portability of all this:
   6378    //
   6379    //   sbrk and brk are not POSIX.  However, any system that is a derivative
   6380    //   of *nix has sbrk and brk because there are too many softwares (such as
   6381    //   the Bourne shell) which rely on the traditional memory map (.text,
   6382    //   .data+.bss, stack) and the existence of sbrk/brk.
   6383    //
   6384    // So we should arguably observe all this.  However:
   6385    // - The current inaccuracy has caused maybe one complaint in seven years(?)
   6386    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
   6387    //   doubt most programmers know the above information.
   6388    // So I'm not terribly unhappy with marking it as undefined. --njn.
   6389    //
   6390    // [More:  I think most of what John said only applies to sbrk().  It seems
   6391    // that brk() always deals in whole pages.  And since this event deals
   6392    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
   6393    // just mark all memory it allocates as defined.]
   6394    //
   6395    VG_(track_new_mem_brk)         ( make_mem_undefined_w_tid );
   6396 
   6397    // Handling of mmap and mprotect isn't simple (well, it is simple,
   6398    // but the justification isn't.)  See comments above, just prior to
   6399    // mc_new_mem_mmap.
   6400    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
   6401    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
   6402 
   6403    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
   6404 
   6405    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
   6406    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
   6407    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
   6408 
   6409    /* Defer the specification of the new_mem_stack functions to the
   6410       post_clo_init function, since we need to first parse the command
   6411       line before deciding which set to use. */
   6412 
   6413 #  ifdef PERF_FAST_STACK
   6414    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
   6415    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
   6416    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
   6417    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
   6418    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
   6419    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
   6420    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
   6421    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
   6422    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
   6423 #  endif
   6424    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
   6425 
   6426    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
   6427 
   6428    VG_(track_pre_mem_read)        ( check_mem_is_defined );
   6429    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
   6430    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
   6431    VG_(track_post_mem_write)      ( mc_post_mem_write );
   6432 
   6433    VG_(track_post_reg_write)                  ( mc_post_reg_write );
   6434    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
   6435 
   6436    VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
   6437 
   6438    init_shadow_memory();
   6439    MC_(chunk_poolalloc) = VG_(newPA) (sizeof(MC_Chunk),
   6440                                       1000,
   6441                                       VG_(malloc),
   6442                                       "mc.cMC.1 (MC_Chunk pools)",
   6443                                       VG_(free));
   6444    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
   6445    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
   6446    init_prof_mem();
   6447 
   6448    tl_assert( mc_expensive_sanity_check() );
   6449 
   6450    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
   6451    tl_assert(sizeof(UWord) == sizeof(Addr));
   6452    // Call me paranoid.  I don't care.
   6453    tl_assert(sizeof(void*) == sizeof(Addr));
   6454 
   6455    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
   6456    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
   6457 
   6458    /* This is small.  Always initialise it. */
   6459    init_nia_to_ecu_cache();
   6460 
   6461    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
   6462       if we need to, since the command line args haven't been
   6463       processed yet.  Hence defer it to mc_post_clo_init. */
   6464    tl_assert(ocacheL1 == NULL);
   6465    tl_assert(ocacheL2 == NULL);
   6466 
   6467    /* Check some important stuff.  See extensive comments above
   6468       re UNALIGNED_OR_HIGH for background. */
   6469 #  if VG_WORDSIZE == 4
   6470    tl_assert(sizeof(void*) == 4);
   6471    tl_assert(sizeof(Addr)  == 4);
   6472    tl_assert(sizeof(UWord) == 4);
   6473    tl_assert(sizeof(Word)  == 4);
   6474    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
   6475    tl_assert(MASK(1) == 0UL);
   6476    tl_assert(MASK(2) == 1UL);
   6477    tl_assert(MASK(4) == 3UL);
   6478    tl_assert(MASK(8) == 7UL);
   6479 #  else
   6480    tl_assert(VG_WORDSIZE == 8);
   6481    tl_assert(sizeof(void*) == 8);
   6482    tl_assert(sizeof(Addr)  == 8);
   6483    tl_assert(sizeof(UWord) == 8);
   6484    tl_assert(sizeof(Word)  == 8);
   6485    tl_assert(MAX_PRIMARY_ADDRESS == 0x7FFFFFFFFULL);
   6486    tl_assert(MASK(1) == 0xFFFFFFF800000000ULL);
   6487    tl_assert(MASK(2) == 0xFFFFFFF800000001ULL);
   6488    tl_assert(MASK(4) == 0xFFFFFFF800000003ULL);
   6489    tl_assert(MASK(8) == 0xFFFFFFF800000007ULL);
   6490 #  endif
   6491 }
   6492 
   6493 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
   6494 
   6495 /*--------------------------------------------------------------------*/
   6496 /*--- end                                                mc_main.c ---*/
   6497 /*--------------------------------------------------------------------*/
   6498