Home | History | Annotate | Download | only in memcheck
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*--------------------------------------------------------------------*/
      4 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
      5 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
      6 /*---                                                    mc_main.c ---*/
      7 /*--------------------------------------------------------------------*/
      8 
      9 /*
     10    This file is part of MemCheck, a heavyweight Valgrind tool for
     11    detecting memory errors.
     12 
     13    Copyright (C) 2000-2015 Julian Seward
     14       jseward (at) acm.org
     15 
     16    This program is free software; you can redistribute it and/or
     17    modify it under the terms of the GNU General Public License as
     18    published by the Free Software Foundation; either version 2 of the
     19    License, or (at your option) any later version.
     20 
     21    This program is distributed in the hope that it will be useful, but
     22    WITHOUT ANY WARRANTY; without even the implied warranty of
     23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     24    General Public License for more details.
     25 
     26    You should have received a copy of the GNU General Public License
     27    along with this program; if not, write to the Free Software
     28    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     29    02111-1307, USA.
     30 
     31    The GNU General Public License is contained in the file COPYING.
     32 */
     33 
     34 #include "pub_tool_basics.h"
     35 #include "pub_tool_aspacemgr.h"
     36 #include "pub_tool_gdbserver.h"
     37 #include "pub_tool_poolalloc.h"
     38 #include "pub_tool_hashtable.h"     // For mc_include.h
     39 #include "pub_tool_libcbase.h"
     40 #include "pub_tool_libcassert.h"
     41 #include "pub_tool_libcprint.h"
     42 #include "pub_tool_machine.h"
     43 #include "pub_tool_mallocfree.h"
     44 #include "pub_tool_options.h"
     45 #include "pub_tool_oset.h"
     46 #include "pub_tool_rangemap.h"
     47 #include "pub_tool_replacemalloc.h"
     48 #include "pub_tool_tooliface.h"
     49 #include "pub_tool_threadstate.h"
     50 
     51 #include "mc_include.h"
     52 #include "memcheck.h"   /* for client requests */
     53 
     54 
     55 /* Set to 1 to enable handwritten assembly helpers on targets for
     56    which it is supported. */
     57 #define ENABLE_ASSEMBLY_HELPERS 1
     58 
     59 /* Set to 1 to do a little more sanity checking */
     60 #define VG_DEBUG_MEMORY 0
     61 
     62 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
     63 
     64 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
     65 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
     66 
     67 
     68 /*------------------------------------------------------------*/
     69 /*--- Fast-case knobs                                      ---*/
     70 /*------------------------------------------------------------*/
     71 
     72 // Comment these out to disable the fast cases (don't just set them to zero).
     73 
     74 #define PERF_FAST_LOADV    1
     75 #define PERF_FAST_STOREV   1
     76 
     77 #define PERF_FAST_SARP     1
     78 
     79 #define PERF_FAST_STACK    1
     80 #define PERF_FAST_STACK2   1
     81 
     82 /* Change this to 1 to enable assertions on origin tracking cache fast
     83    paths */
     84 #define OC_ENABLE_ASSERTIONS 0
     85 
     86 
     87 /*------------------------------------------------------------*/
     88 /*--- Comments on the origin tracking implementation       ---*/
     89 /*------------------------------------------------------------*/
     90 
     91 /* See detailed comment entitled
     92    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
     93    which is contained further on in this file. */
     94 
     95 
     96 /*------------------------------------------------------------*/
     97 /*--- V bits and A bits                                    ---*/
     98 /*------------------------------------------------------------*/
     99 
    100 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
    101    thinks the corresponding value bit is defined.  And every memory byte
    102    has an A bit, which tracks whether Memcheck thinks the program can access
    103    it safely (ie. it's mapped, and has at least one of the RWX permission bits
    104    set).  So every N-bit register is shadowed with N V bits, and every memory
    105    byte is shadowed with 8 V bits and one A bit.
    106 
    107    In the implementation, we use two forms of compression (compressed V bits
    108    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
    109    for memory.
    110 
    111    Memcheck also tracks extra information about each heap block that is
    112    allocated, for detecting memory leaks and other purposes.
    113 */
    114 
    115 /*------------------------------------------------------------*/
    116 /*--- Basic A/V bitmap representation.                     ---*/
    117 /*------------------------------------------------------------*/
    118 
    119 /* All reads and writes are checked against a memory map (a.k.a. shadow
    120    memory), which records the state of all memory in the process.
    121 
    122    On 32-bit machines the memory map is organised as follows.
    123    The top 16 bits of an address are used to index into a top-level
    124    map table, containing 65536 entries.  Each entry is a pointer to a
    125    second-level map, which records the accesibililty and validity
    126    permissions for the 65536 bytes indexed by the lower 16 bits of the
    127    address.  Each byte is represented by two bits (details are below).  So
    128    each second-level map contains 16384 bytes.  This two-level arrangement
    129    conveniently divides the 4G address space into 64k lumps, each size 64k
    130    bytes.
    131 
    132    All entries in the primary (top-level) map must point to a valid
    133    secondary (second-level) map.  Since many of the 64kB chunks will
    134    have the same status for every bit -- ie. noaccess (for unused
    135    address space) or entirely addressable and defined (for code segments) --
    136    there are three distinguished secondary maps, which indicate 'noaccess',
    137    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
    138    map entry points to the relevant distinguished map.  In practice,
    139    typically more than half of the addressable memory is represented with
    140    the 'undefined' or 'defined' distinguished secondary map, so it gives a
    141    good saving.  It also lets us set the V+A bits of large address regions
    142    quickly in set_address_range_perms().
    143 
    144    On 64-bit machines it's more complicated.  If we followed the same basic
    145    scheme we'd have a four-level table which would require too many memory
    146    accesses.  So instead the top-level map table has 2^20 entries (indexed
    147    using bits 16..35 of the address);  this covers the bottom 64GB.  Any
    148    accesses above 64GB are handled with a slow, sparse auxiliary table.
    149    Valgrind's address space manager tries very hard to keep things below
    150    this 64GB barrier so that performance doesn't suffer too much.
    151 
    152    Note that this file has a lot of different functions for reading and
    153    writing shadow memory.  Only a couple are strictly necessary (eg.
    154    get_vabits2 and set_vabits2), most are just specialised for specific
    155    common cases to improve performance.
    156 
    157    Aside: the V+A bits are less precise than they could be -- we have no way
    158    of marking memory as read-only.  It would be great if we could add an
    159    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
    160    which requires 2.3 bits to hold, and there's no way to do that elegantly
    161    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
    162    seem worth it.
    163 */
    164 
    165 /* --------------- Basic configuration --------------- */
    166 
    167 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
    168 
    169 #if VG_WORDSIZE == 4
    170 
    171 /* cover the entire address space */
    172 #  define N_PRIMARY_BITS  16
    173 
    174 #else
    175 
    176 /* Just handle the first 64G fast and the rest via auxiliary
    177    primaries.  If you change this, Memcheck will assert at startup.
    178    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
    179 #  define N_PRIMARY_BITS  20
    180 
    181 #endif
    182 
    183 
    184 /* Do not change this. */
    185 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
    186 
    187 /* Do not change this. */
    188 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
    189 
    190 
    191 /* --------------- Secondary maps --------------- */
    192 
    193 // Each byte of memory conceptually has an A bit, which indicates its
    194 // addressability, and 8 V bits, which indicates its definedness.
    195 //
    196 // But because very few bytes are partially defined, we can use a nice
    197 // compression scheme to reduce the size of shadow memory.  Each byte of
    198 // memory has 2 bits which indicates its state (ie. V+A bits):
    199 //
    200 //   00:  noaccess    (unaddressable but treated as fully defined)
    201 //   01:  undefined   (addressable and fully undefined)
    202 //   10:  defined     (addressable and fully defined)
    203 //   11:  partdefined (addressable and partially defined)
    204 //
    205 // In the "partdefined" case, we use a secondary table to store the V bits.
    206 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
    207 // bits.
    208 //
    209 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
    210 // four bytes (32 bits) of memory are in each chunk.  Hence the name
    211 // "vabits8".  This lets us get the V+A bits for four bytes at a time
    212 // easily (without having to do any shifting and/or masking), and that is a
    213 // very common operation.  (Note that although each vabits8 chunk
    214 // is 8 bits in size, it represents 32 bits of memory.)
    215 //
    216 // The representation is "inverse" little-endian... each 4 bytes of
    217 // memory is represented by a 1 byte value, where:
    218 //
    219 // - the status of byte (a+0) is held in bits [1..0]
    220 // - the status of byte (a+1) is held in bits [3..2]
    221 // - the status of byte (a+2) is held in bits [5..4]
    222 // - the status of byte (a+3) is held in bits [7..6]
    223 //
    224 // It's "inverse" because endianness normally describes a mapping from
    225 // value bits to memory addresses;  in this case the mapping is inverted.
    226 // Ie. instead of particular value bits being held in certain addresses, in
    227 // this case certain addresses are represented by particular value bits.
    228 // See insert_vabits2_into_vabits8() for an example.
    229 //
    230 // But note that we don't compress the V bits stored in registers;  they
    231 // need to be explicit to made the shadow operations possible.  Therefore
    232 // when moving values between registers and memory we need to convert
    233 // between the expanded in-register format and the compressed in-memory
    234 // format.  This isn't so difficult, it just requires careful attention in a
    235 // few places.
    236 
    237 // These represent eight bits of memory.
    238 #define VA_BITS2_NOACCESS     0x0      // 00b
    239 #define VA_BITS2_UNDEFINED    0x1      // 01b
    240 #define VA_BITS2_DEFINED      0x2      // 10b
    241 #define VA_BITS2_PARTDEFINED  0x3      // 11b
    242 
    243 // These represent 16 bits of memory.
    244 #define VA_BITS4_NOACCESS     0x0      // 00_00b
    245 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
    246 #define VA_BITS4_DEFINED      0xa      // 10_10b
    247 
    248 // These represent 32 bits of memory.
    249 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
    250 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
    251 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
    252 
    253 // These represent 64 bits of memory.
    254 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
    255 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
    256 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
    257 
    258 
    259 #define SM_CHUNKS             16384    // Each SM covers 64k of memory.
    260 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
    261 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
    262 
    263 // Paranoia:  it's critical for performance that the requested inlining
    264 // occurs.  So try extra hard.
    265 #define INLINE    inline __attribute__((always_inline))
    266 
    267 static INLINE Addr start_of_this_sm ( Addr a ) {
    268    return (a & (~SM_MASK));
    269 }
    270 static INLINE Bool is_start_of_sm ( Addr a ) {
    271    return (start_of_this_sm(a) == a);
    272 }
    273 
    274 typedef
    275    struct {
    276       UChar vabits8[SM_CHUNKS];
    277    }
    278    SecMap;
    279 
    280 // 3 distinguished secondary maps, one for no-access, one for
    281 // accessible but undefined, and one for accessible and defined.
    282 // Distinguished secondaries may never be modified.
    283 #define SM_DIST_NOACCESS   0
    284 #define SM_DIST_UNDEFINED  1
    285 #define SM_DIST_DEFINED    2
    286 
    287 static SecMap sm_distinguished[3];
    288 
    289 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
    290    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
    291 }
    292 
    293 // Forward declaration
    294 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
    295 
    296 /* dist_sm points to one of our three distinguished secondaries.  Make
    297    a copy of it so that we can write to it.
    298 */
    299 static SecMap* copy_for_writing ( SecMap* dist_sm )
    300 {
    301    SecMap* new_sm;
    302    tl_assert(dist_sm == &sm_distinguished[0]
    303           || dist_sm == &sm_distinguished[1]
    304           || dist_sm == &sm_distinguished[2]);
    305 
    306    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
    307    if (new_sm == NULL)
    308       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
    309                                    sizeof(SecMap) );
    310    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
    311    update_SM_counts(dist_sm, new_sm);
    312    return new_sm;
    313 }
    314 
    315 /* --------------- Stats --------------- */
    316 
    317 static Int   n_issued_SMs      = 0;
    318 static Int   n_deissued_SMs    = 0;
    319 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
    320 static Int   n_undefined_SMs   = 0;
    321 static Int   n_defined_SMs     = 0;
    322 static Int   n_non_DSM_SMs     = 0;
    323 static Int   max_noaccess_SMs  = 0;
    324 static Int   max_undefined_SMs = 0;
    325 static Int   max_defined_SMs   = 0;
    326 static Int   max_non_DSM_SMs   = 0;
    327 
    328 /* # searches initiated in auxmap_L1, and # base cmps required */
    329 static ULong n_auxmap_L1_searches  = 0;
    330 static ULong n_auxmap_L1_cmps      = 0;
    331 /* # of searches that missed in auxmap_L1 and therefore had to
    332    be handed to auxmap_L2. And the number of nodes inserted. */
    333 static ULong n_auxmap_L2_searches  = 0;
    334 static ULong n_auxmap_L2_nodes     = 0;
    335 
    336 static Int   n_sanity_cheap     = 0;
    337 static Int   n_sanity_expensive = 0;
    338 
    339 static Int   n_secVBit_nodes   = 0;
    340 static Int   max_secVBit_nodes = 0;
    341 
    342 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
    343 {
    344    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
    345    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
    346    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
    347    else                                                  { n_non_DSM_SMs  --;
    348                                                            n_deissued_SMs ++; }
    349 
    350    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
    351    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
    352    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
    353    else                                                  { n_non_DSM_SMs  ++;
    354                                                            n_issued_SMs   ++; }
    355 
    356    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
    357    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
    358    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
    359    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
    360 }
    361 
    362 /* --------------- Primary maps --------------- */
    363 
    364 /* The main primary map.  This covers some initial part of the address
    365    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
    366    handled using the auxiliary primary map.
    367 */
    368 static SecMap* primary_map[N_PRIMARY_MAP];
    369 
    370 
    371 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
    372    value, and sm points at the relevant secondary map.  As with the
    373    main primary map, the secondary may be either a real secondary, or
    374    one of the three distinguished secondaries.  DO NOT CHANGE THIS
    375    LAYOUT: the first word has to be the key for OSet fast lookups.
    376 */
    377 typedef
    378    struct {
    379       Addr    base;
    380       SecMap* sm;
    381    }
    382    AuxMapEnt;
    383 
    384 /* Tunable parameter: How big is the L1 queue? */
    385 #define N_AUXMAP_L1 24
    386 
    387 /* Tunable parameter: How far along the L1 queue to insert
    388    entries resulting from L2 lookups? */
    389 #define AUXMAP_L1_INSERT_IX 12
    390 
    391 static struct {
    392           Addr       base;
    393           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
    394        }
    395        auxmap_L1[N_AUXMAP_L1];
    396 
    397 static OSet* auxmap_L2 = NULL;
    398 
    399 static void init_auxmap_L1_L2 ( void )
    400 {
    401    Int i;
    402    for (i = 0; i < N_AUXMAP_L1; i++) {
    403       auxmap_L1[i].base = 0;
    404       auxmap_L1[i].ent  = NULL;
    405    }
    406 
    407    tl_assert(0 == offsetof(AuxMapEnt,base));
    408    tl_assert(sizeof(Addr) == sizeof(void*));
    409    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
    410                                     /*fastCmp*/ NULL,
    411                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
    412 }
    413 
    414 /* Check representation invariants; if OK return NULL; else a
    415    descriptive bit of text.  Also return the number of
    416    non-distinguished secondary maps referred to from the auxiliary
    417    primary maps. */
    418 
    419 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
    420 {
    421    Word i, j;
    422    /* On a 32-bit platform, the L2 and L1 tables should
    423       both remain empty forever.
    424 
    425       On a 64-bit platform:
    426       In the L2 table:
    427        all .base & 0xFFFF == 0
    428        all .base > MAX_PRIMARY_ADDRESS
    429       In the L1 table:
    430        all .base & 0xFFFF == 0
    431        all (.base > MAX_PRIMARY_ADDRESS
    432             .base & 0xFFFF == 0
    433             and .ent points to an AuxMapEnt with the same .base)
    434            or
    435            (.base == 0 and .ent == NULL)
    436    */
    437    *n_secmaps_found = 0;
    438    if (sizeof(void*) == 4) {
    439       /* 32-bit platform */
    440       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
    441          return "32-bit: auxmap_L2 is non-empty";
    442       for (i = 0; i < N_AUXMAP_L1; i++)
    443         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
    444       return "32-bit: auxmap_L1 is non-empty";
    445    } else {
    446       /* 64-bit platform */
    447       UWord elems_seen = 0;
    448       AuxMapEnt *elem, *res;
    449       AuxMapEnt key;
    450       /* L2 table */
    451       VG_(OSetGen_ResetIter)(auxmap_L2);
    452       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
    453          elems_seen++;
    454          if (0 != (elem->base & (Addr)0xFFFF))
    455             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
    456          if (elem->base <= MAX_PRIMARY_ADDRESS)
    457             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
    458          if (elem->sm == NULL)
    459             return "64-bit: .sm in _L2 is NULL";
    460          if (!is_distinguished_sm(elem->sm))
    461             (*n_secmaps_found)++;
    462       }
    463       if (elems_seen != n_auxmap_L2_nodes)
    464          return "64-bit: disagreement on number of elems in _L2";
    465       /* Check L1-L2 correspondence */
    466       for (i = 0; i < N_AUXMAP_L1; i++) {
    467          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
    468             continue;
    469          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
    470             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
    471          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
    472             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
    473          if (auxmap_L1[i].ent == NULL)
    474             return "64-bit: .ent is NULL in auxmap_L1";
    475          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
    476             return "64-bit: _L1 and _L2 bases are inconsistent";
    477          /* Look it up in auxmap_L2. */
    478          key.base = auxmap_L1[i].base;
    479          key.sm   = 0;
    480          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    481          if (res == NULL)
    482             return "64-bit: _L1 .base not found in _L2";
    483          if (res != auxmap_L1[i].ent)
    484             return "64-bit: _L1 .ent disagrees with _L2 entry";
    485       }
    486       /* Check L1 contains no duplicates */
    487       for (i = 0; i < N_AUXMAP_L1; i++) {
    488          if (auxmap_L1[i].base == 0)
    489             continue;
    490 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
    491             if (auxmap_L1[j].base == 0)
    492                continue;
    493             if (auxmap_L1[j].base == auxmap_L1[i].base)
    494                return "64-bit: duplicate _L1 .base entries";
    495          }
    496       }
    497    }
    498    return NULL; /* ok */
    499 }
    500 
    501 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
    502 {
    503    Word i;
    504    tl_assert(ent);
    505    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
    506    for (i = N_AUXMAP_L1-1; i > rank; i--)
    507       auxmap_L1[i] = auxmap_L1[i-1];
    508    auxmap_L1[rank].base = ent->base;
    509    auxmap_L1[rank].ent  = ent;
    510 }
    511 
    512 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
    513 {
    514    AuxMapEnt  key;
    515    AuxMapEnt* res;
    516    Word       i;
    517 
    518    tl_assert(a > MAX_PRIMARY_ADDRESS);
    519    a &= ~(Addr)0xFFFF;
    520 
    521    /* First search the front-cache, which is a self-organising
    522       list containing the most popular entries. */
    523 
    524    if (LIKELY(auxmap_L1[0].base == a))
    525       return auxmap_L1[0].ent;
    526    if (LIKELY(auxmap_L1[1].base == a)) {
    527       Addr       t_base = auxmap_L1[0].base;
    528       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
    529       auxmap_L1[0].base = auxmap_L1[1].base;
    530       auxmap_L1[0].ent  = auxmap_L1[1].ent;
    531       auxmap_L1[1].base = t_base;
    532       auxmap_L1[1].ent  = t_ent;
    533       return auxmap_L1[0].ent;
    534    }
    535 
    536    n_auxmap_L1_searches++;
    537 
    538    for (i = 0; i < N_AUXMAP_L1; i++) {
    539       if (auxmap_L1[i].base == a) {
    540          break;
    541       }
    542    }
    543    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
    544 
    545    n_auxmap_L1_cmps += (ULong)(i+1);
    546 
    547    if (i < N_AUXMAP_L1) {
    548       if (i > 0) {
    549          Addr       t_base = auxmap_L1[i-1].base;
    550          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
    551          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
    552          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
    553          auxmap_L1[i-0].base = t_base;
    554          auxmap_L1[i-0].ent  = t_ent;
    555          i--;
    556       }
    557       return auxmap_L1[i].ent;
    558    }
    559 
    560    n_auxmap_L2_searches++;
    561 
    562    /* First see if we already have it. */
    563    key.base = a;
    564    key.sm   = 0;
    565 
    566    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    567    if (res)
    568       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
    569    return res;
    570 }
    571 
    572 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
    573 {
    574    AuxMapEnt *nyu, *res;
    575 
    576    /* First see if we already have it. */
    577    res = maybe_find_in_auxmap( a );
    578    if (LIKELY(res))
    579       return res;
    580 
    581    /* Ok, there's no entry in the secondary map, so we'll have
    582       to allocate one. */
    583    a &= ~(Addr)0xFFFF;
    584 
    585    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
    586    nyu->base = a;
    587    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
    588    VG_(OSetGen_Insert)( auxmap_L2, nyu );
    589    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
    590    n_auxmap_L2_nodes++;
    591    return nyu;
    592 }
    593 
    594 /* --------------- SecMap fundamentals --------------- */
    595 
    596 // In all these, 'low' means it's definitely in the main primary map,
    597 // 'high' means it's definitely in the auxiliary table.
    598 
    599 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
    600 {
    601    UWord pm_off = a >> 16;
    602 #  if VG_DEBUG_MEMORY >= 1
    603    tl_assert(pm_off < N_PRIMARY_MAP);
    604 #  endif
    605    return &primary_map[ pm_off ];
    606 }
    607 
    608 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
    609 {
    610    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
    611    return &am->sm;
    612 }
    613 
    614 static INLINE SecMap** get_secmap_ptr ( Addr a )
    615 {
    616    return ( a <= MAX_PRIMARY_ADDRESS
    617           ? get_secmap_low_ptr(a)
    618           : get_secmap_high_ptr(a));
    619 }
    620 
    621 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
    622 {
    623    return *get_secmap_low_ptr(a);
    624 }
    625 
    626 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
    627 {
    628    return *get_secmap_high_ptr(a);
    629 }
    630 
    631 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
    632 {
    633    SecMap** p = get_secmap_low_ptr(a);
    634    if (UNLIKELY(is_distinguished_sm(*p)))
    635       *p = copy_for_writing(*p);
    636    return *p;
    637 }
    638 
    639 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
    640 {
    641    SecMap** p = get_secmap_high_ptr(a);
    642    if (UNLIKELY(is_distinguished_sm(*p)))
    643       *p = copy_for_writing(*p);
    644    return *p;
    645 }
    646 
    647 /* Produce the secmap for 'a', either from the primary map or by
    648    ensuring there is an entry for it in the aux primary map.  The
    649    secmap may be a distinguished one as the caller will only want to
    650    be able to read it.
    651 */
    652 static INLINE SecMap* get_secmap_for_reading ( Addr a )
    653 {
    654    return ( a <= MAX_PRIMARY_ADDRESS
    655           ? get_secmap_for_reading_low (a)
    656           : get_secmap_for_reading_high(a) );
    657 }
    658 
    659 /* Produce the secmap for 'a', either from the primary map or by
    660    ensuring there is an entry for it in the aux primary map.  The
    661    secmap may not be a distinguished one, since the caller will want
    662    to be able to write it.  If it is a distinguished secondary, make a
    663    writable copy of it, install it, and return the copy instead.  (COW
    664    semantics).
    665 */
    666 static INLINE SecMap* get_secmap_for_writing ( Addr a )
    667 {
    668    return ( a <= MAX_PRIMARY_ADDRESS
    669           ? get_secmap_for_writing_low (a)
    670           : get_secmap_for_writing_high(a) );
    671 }
    672 
    673 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
    674    allocate one if one doesn't already exist.  This is used by the
    675    leak checker.
    676 */
    677 static SecMap* maybe_get_secmap_for ( Addr a )
    678 {
    679    if (a <= MAX_PRIMARY_ADDRESS) {
    680       return get_secmap_for_reading_low(a);
    681    } else {
    682       AuxMapEnt* am = maybe_find_in_auxmap(a);
    683       return am ? am->sm : NULL;
    684    }
    685 }
    686 
    687 /* --------------- Fundamental functions --------------- */
    688 
    689 static INLINE
    690 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
    691 {
    692    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
    693    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
    694    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
    695 }
    696 
    697 static INLINE
    698 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
    699 {
    700    UInt shift;
    701    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    702    shift     =  (a & 2)   << 1;        // shift by 0 or 4
    703    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
    704    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
    705 }
    706 
    707 static INLINE
    708 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
    709 {
    710    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
    711    vabits8 >>= shift;                  // shift the two bits to the bottom
    712    return 0x3 & vabits8;               // mask out the rest
    713 }
    714 
    715 static INLINE
    716 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
    717 {
    718    UInt shift;
    719    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    720    shift = (a & 2) << 1;               // shift by 0 or 4
    721    vabits8 >>= shift;                  // shift the four bits to the bottom
    722    return 0xf & vabits8;               // mask out the rest
    723 }
    724 
    725 // Note that these four are only used in slow cases.  The fast cases do
    726 // clever things like combine the auxmap check (in
    727 // get_secmap_{read,writ}able) with alignment checks.
    728 
    729 // *** WARNING! ***
    730 // Any time this function is called, if it is possible that vabits2
    731 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
    732 // sec-V-bits table must also be set!
    733 static INLINE
    734 void set_vabits2 ( Addr a, UChar vabits2 )
    735 {
    736    SecMap* sm       = get_secmap_for_writing(a);
    737    UWord   sm_off   = SM_OFF(a);
    738    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
    739 }
    740 
    741 static INLINE
    742 UChar get_vabits2 ( Addr a )
    743 {
    744    SecMap* sm       = get_secmap_for_reading(a);
    745    UWord   sm_off   = SM_OFF(a);
    746    UChar   vabits8  = sm->vabits8[sm_off];
    747    return extract_vabits2_from_vabits8(a, vabits8);
    748 }
    749 
    750 // *** WARNING! ***
    751 // Any time this function is called, if it is possible that any of the
    752 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
    753 // corresponding entry(s) in the sec-V-bits table must also be set!
    754 static INLINE
    755 UChar get_vabits8_for_aligned_word32 ( Addr a )
    756 {
    757    SecMap* sm       = get_secmap_for_reading(a);
    758    UWord   sm_off   = SM_OFF(a);
    759    UChar   vabits8  = sm->vabits8[sm_off];
    760    return vabits8;
    761 }
    762 
    763 static INLINE
    764 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
    765 {
    766    SecMap* sm       = get_secmap_for_writing(a);
    767    UWord   sm_off   = SM_OFF(a);
    768    sm->vabits8[sm_off] = vabits8;
    769 }
    770 
    771 
    772 // Forward declarations
    773 static UWord get_sec_vbits8(Addr a);
    774 static void  set_sec_vbits8(Addr a, UWord vbits8);
    775 
    776 // Returns False if there was an addressability error.
    777 static INLINE
    778 Bool set_vbits8 ( Addr a, UChar vbits8 )
    779 {
    780    Bool  ok      = True;
    781    UChar vabits2 = get_vabits2(a);
    782    if ( VA_BITS2_NOACCESS != vabits2 ) {
    783       // Addressable.  Convert in-register format to in-memory format.
    784       // Also remove any existing sec V bit entry for the byte if no
    785       // longer necessary.
    786       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
    787       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
    788       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
    789                                                 set_sec_vbits8(a, vbits8);  }
    790       set_vabits2(a, vabits2);
    791 
    792    } else {
    793       // Unaddressable!  Do nothing -- when writing to unaddressable
    794       // memory it acts as a black hole, and the V bits can never be seen
    795       // again.  So we don't have to write them at all.
    796       ok = False;
    797    }
    798    return ok;
    799 }
    800 
    801 // Returns False if there was an addressability error.  In that case, we put
    802 // all defined bits into vbits8.
    803 static INLINE
    804 Bool get_vbits8 ( Addr a, UChar* vbits8 )
    805 {
    806    Bool  ok      = True;
    807    UChar vabits2 = get_vabits2(a);
    808 
    809    // Convert the in-memory format to in-register format.
    810    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
    811    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
    812    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
    813       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
    814       ok = False;
    815    } else {
    816       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
    817       *vbits8 = get_sec_vbits8(a);
    818    }
    819    return ok;
    820 }
    821 
    822 
    823 /* --------------- Secondary V bit table ------------ */
    824 
    825 // This table holds the full V bit pattern for partially-defined bytes
    826 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
    827 // memory.
    828 //
    829 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
    830 // then overwrite the same address with a fully defined byte, the sec-V-bit
    831 // node will not necessarily be removed.  This is because checking for
    832 // whether removal is necessary would slow down the fast paths.
    833 //
    834 // To avoid the stale nodes building up too much, we periodically (once the
    835 // table reaches a certain size) garbage collect (GC) the table by
    836 // traversing it and evicting any nodes not having PDB.
    837 // If more than a certain proportion of nodes survived, we increase the
    838 // table size so that GCs occur less often.
    839 //
    840 // This policy is designed to avoid bad table bloat in the worst case where
    841 // a program creates huge numbers of stale PDBs -- we would get this bloat
    842 // if we had no GC -- while handling well the case where a node becomes
    843 // stale but shortly afterwards is rewritten with a PDB and so becomes
    844 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
    845 // remove all stale nodes as soon as possible, we just end up re-adding a
    846 // lot of them in later again.  The "sufficiently stale" approach avoids
    847 // this.  (If a program has many live PDBs, performance will just suck,
    848 // there's no way around that.)
    849 //
    850 // Further comments, JRS 14 Feb 2012.  It turns out that the policy of
    851 // holding on to stale entries for 2 GCs before discarding them can lead
    852 // to massive space leaks.  So we're changing to an arrangement where
    853 // lines are evicted as soon as they are observed to be stale during a
    854 // GC.  This also has a side benefit of allowing the sufficiently_stale
    855 // field to be removed from the SecVBitNode struct, reducing its size by
    856 // 8 bytes, which is a substantial space saving considering that the
    857 // struct was previously 32 or so bytes, on a 64 bit target.
    858 //
    859 // In order to try and mitigate the problem that the "sufficiently stale"
    860 // heuristic was designed to avoid, the table size is allowed to drift
    861 // up ("DRIFTUP") slowly to 80000, even if the residency is low.  This
    862 // means that nodes will exist in the table longer on average, and hopefully
    863 // will be deleted and re-added less frequently.
    864 //
    865 // The previous scaling up mechanism (now called STEPUP) is retained:
    866 // if residency exceeds 50%, the table is scaled up, although by a
    867 // factor sqrt(2) rather than 2 as before.  This effectively doubles the
    868 // frequency of GCs when there are many PDBs at reduces the tendency of
    869 // stale PDBs to reside for long periods in the table.
    870 
    871 static OSet* secVBitTable;
    872 
    873 // Stats
    874 static ULong sec_vbits_new_nodes = 0;
    875 static ULong sec_vbits_updates   = 0;
    876 
    877 // This must be a power of two;  this is checked in mc_pre_clo_init().
    878 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
    879 // a larger address range) they take more space but we can get multiple
    880 // partially-defined bytes in one if they are close to each other, reducing
    881 // the number of total nodes.  In practice sometimes they are clustered (eg.
    882 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
    883 // row), but often not.  So we choose something intermediate.
    884 #define BYTES_PER_SEC_VBIT_NODE     16
    885 
    886 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
    887 // more than this many nodes survive a GC.
    888 #define STEPUP_SURVIVOR_PROPORTION  0.5
    889 #define STEPUP_GROWTH_FACTOR        1.414213562
    890 
    891 // If the above heuristic doesn't apply, then we may make the table
    892 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
    893 // this many nodes survive a GC, _and_ the total table size does
    894 // not exceed a fixed limit.  The numbers are somewhat arbitrary, but
    895 // work tolerably well on long Firefox runs.  The scaleup ratio of 1.5%
    896 // effectively although gradually reduces residency and increases time
    897 // between GCs for programs with small numbers of PDBs.  The 80000 limit
    898 // effectively limits the table size to around 2MB for programs with
    899 // small numbers of PDBs, whilst giving a reasonably long lifetime to
    900 // entries, to try and reduce the costs resulting from deleting and
    901 // re-adding of entries.
    902 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
    903 #define DRIFTUP_GROWTH_FACTOR       1.015
    904 #define DRIFTUP_MAX_SIZE            80000
    905 
    906 // We GC the table when it gets this many nodes in it, ie. it's effectively
    907 // the table size.  It can change.
    908 static Int  secVBitLimit = 1000;
    909 
    910 // The number of GCs done, used to age sec-V-bit nodes for eviction.
    911 // Because it's unsigned, wrapping doesn't matter -- the right answer will
    912 // come out anyway.
    913 static UInt GCs_done = 0;
    914 
    915 typedef
    916    struct {
    917       Addr  a;
    918       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
    919    }
    920    SecVBitNode;
    921 
    922 static OSet* createSecVBitTable(void)
    923 {
    924    OSet* newSecVBitTable;
    925    newSecVBitTable = VG_(OSetGen_Create_With_Pool)
    926       ( offsetof(SecVBitNode, a),
    927         NULL, // use fast comparisons
    928         VG_(malloc), "mc.cSVT.1 (sec VBit table)",
    929         VG_(free),
    930         1000,
    931         sizeof(SecVBitNode));
    932    return newSecVBitTable;
    933 }
    934 
    935 static void gcSecVBitTable(void)
    936 {
    937    OSet*        secVBitTable2;
    938    SecVBitNode* n;
    939    Int          i, n_nodes = 0, n_survivors = 0;
    940 
    941    GCs_done++;
    942 
    943    // Create the new table.
    944    secVBitTable2 = createSecVBitTable();
    945 
    946    // Traverse the table, moving fresh nodes into the new table.
    947    VG_(OSetGen_ResetIter)(secVBitTable);
    948    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
    949       // Keep node if any of its bytes are non-stale.  Using
    950       // get_vabits2() for the lookup is not very efficient, but I don't
    951       // think it matters.
    952       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
    953          if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
    954             // Found a non-stale byte, so keep =>
    955             // Insert a copy of the node into the new table.
    956             SecVBitNode* n2 =
    957                VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
    958             *n2 = *n;
    959             VG_(OSetGen_Insert)(secVBitTable2, n2);
    960             break;
    961          }
    962       }
    963    }
    964 
    965    // Get the before and after sizes.
    966    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
    967    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
    968 
    969    // Destroy the old table, and put the new one in its place.
    970    VG_(OSetGen_Destroy)(secVBitTable);
    971    secVBitTable = secVBitTable2;
    972 
    973    if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
    974       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
    975                    n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
    976    }
    977 
    978    // Increase table size if necessary.
    979    if ((Double)n_survivors
    980        > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
    981       secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
    982       if (VG_(clo_verbosity) > 1)
    983          VG_(message)(Vg_DebugMsg,
    984                       "memcheck GC: %d new table size (stepup)\n",
    985                       secVBitLimit);
    986    }
    987    else
    988    if (secVBitLimit < DRIFTUP_MAX_SIZE
    989        && (Double)n_survivors
    990           > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
    991       secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
    992       if (VG_(clo_verbosity) > 1)
    993          VG_(message)(Vg_DebugMsg,
    994                       "memcheck GC: %d new table size (driftup)\n",
    995                       secVBitLimit);
    996    }
    997 }
    998 
    999 static UWord get_sec_vbits8(Addr a)
   1000 {
   1001    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
   1002    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
   1003    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
   1004    UChar        vbits8;
   1005    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
   1006    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
   1007    // make it to the secondary V bits table.
   1008    vbits8 = n->vbits8[amod];
   1009    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
   1010    return vbits8;
   1011 }
   1012 
   1013 static void set_sec_vbits8(Addr a, UWord vbits8)
   1014 {
   1015    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
   1016    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
   1017    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
   1018    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
   1019    // make it to the secondary V bits table.
   1020    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
   1021    if (n) {
   1022       n->vbits8[amod] = vbits8;     // update
   1023       sec_vbits_updates++;
   1024    } else {
   1025       // Do a table GC if necessary.  Nb: do this before creating and
   1026       // inserting the new node, to avoid erroneously GC'ing the new node.
   1027       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
   1028          gcSecVBitTable();
   1029       }
   1030 
   1031       // New node:  assign the specific byte, make the rest invalid (they
   1032       // should never be read as-is, but be cautious).
   1033       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
   1034       n->a            = aAligned;
   1035       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
   1036          n->vbits8[i] = V_BITS8_UNDEFINED;
   1037       }
   1038       n->vbits8[amod] = vbits8;
   1039 
   1040       // Insert the new node.
   1041       VG_(OSetGen_Insert)(secVBitTable, n);
   1042       sec_vbits_new_nodes++;
   1043 
   1044       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
   1045       if (n_secVBit_nodes > max_secVBit_nodes)
   1046          max_secVBit_nodes = n_secVBit_nodes;
   1047    }
   1048 }
   1049 
   1050 /* --------------- Endianness helpers --------------- */
   1051 
   1052 /* Returns the offset in memory of the byteno-th most significant byte
   1053    in a wordszB-sized word, given the specified endianness. */
   1054 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
   1055                                     UWord byteno ) {
   1056    return bigendian ? (wordszB-1-byteno) : byteno;
   1057 }
   1058 
   1059 
   1060 /* --------------- Ignored address ranges --------------- */
   1061 
   1062 /* Denotes the address-error-reportability status for address ranges:
   1063    IAR_NotIgnored:  the usual case -- report errors in this range
   1064    IAR_CommandLine: don't report errors -- from command line setting
   1065    IAR_ClientReq:   don't report errors -- from client request
   1066 */
   1067 typedef
   1068    enum { IAR_INVALID=99,
   1069           IAR_NotIgnored,
   1070           IAR_CommandLine,
   1071           IAR_ClientReq }
   1072    IARKind;
   1073 
   1074 static const HChar* showIARKind ( IARKind iark )
   1075 {
   1076    switch (iark) {
   1077       case IAR_INVALID:     return "INVALID";
   1078       case IAR_NotIgnored:  return "NotIgnored";
   1079       case IAR_CommandLine: return "CommandLine";
   1080       case IAR_ClientReq:   return "ClientReq";
   1081       default:              return "???";
   1082    }
   1083 }
   1084 
   1085 // RangeMap<IARKind>
   1086 static RangeMap* gIgnoredAddressRanges = NULL;
   1087 
   1088 static void init_gIgnoredAddressRanges ( void )
   1089 {
   1090    if (LIKELY(gIgnoredAddressRanges != NULL))
   1091       return;
   1092    gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
   1093                                              VG_(free), IAR_NotIgnored );
   1094 }
   1095 
   1096 Bool MC_(in_ignored_range) ( Addr a )
   1097 {
   1098    if (LIKELY(gIgnoredAddressRanges == NULL))
   1099       return False;
   1100    UWord how     = IAR_INVALID;
   1101    UWord key_min = ~(UWord)0;
   1102    UWord key_max =  (UWord)0;
   1103    VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
   1104    tl_assert(key_min <= a && a <= key_max);
   1105    switch (how) {
   1106       case IAR_NotIgnored:  return False;
   1107       case IAR_CommandLine: return True;
   1108       case IAR_ClientReq:   return True;
   1109       default: break; /* invalid */
   1110    }
   1111    VG_(tool_panic)("MC_(in_ignore_range)");
   1112    /*NOTREACHED*/
   1113 }
   1114 
   1115 /* Parse two Addr separated by a dash, or fail. */
   1116 
   1117 static Bool parse_range ( const HChar** ppc, Addr* result1, Addr* result2 )
   1118 {
   1119    Bool ok = VG_(parse_Addr) (ppc, result1);
   1120    if (!ok)
   1121       return False;
   1122    if (**ppc != '-')
   1123       return False;
   1124    (*ppc)++;
   1125    ok = VG_(parse_Addr) (ppc, result2);
   1126    if (!ok)
   1127       return False;
   1128    return True;
   1129 }
   1130 
   1131 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
   1132    fail.  If they are valid, add them to the global set of ignored
   1133    ranges. */
   1134 static Bool parse_ignore_ranges ( const HChar* str0 )
   1135 {
   1136    init_gIgnoredAddressRanges();
   1137    const HChar*  str = str0;
   1138    const HChar** ppc = &str;
   1139    while (1) {
   1140       Addr start = ~(Addr)0;
   1141       Addr end   = (Addr)0;
   1142       Bool ok    = parse_range(ppc, &start, &end);
   1143       if (!ok)
   1144          return False;
   1145       if (start > end)
   1146          return False;
   1147       VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
   1148       if (**ppc == 0)
   1149          return True;
   1150       if (**ppc != ',')
   1151          return False;
   1152       (*ppc)++;
   1153    }
   1154    /*NOTREACHED*/
   1155    return False;
   1156 }
   1157 
   1158 /* Add or remove [start, +len) from the set of ignored ranges. */
   1159 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
   1160 {
   1161    init_gIgnoredAddressRanges();
   1162    const Bool verbose = (VG_(clo_verbosity) > 1);
   1163    if (len == 0) {
   1164       return False;
   1165    }
   1166    if (addRange) {
   1167       VG_(bindRangeMap)(gIgnoredAddressRanges,
   1168                         start, start+len-1, IAR_ClientReq);
   1169       if (verbose)
   1170          VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
   1171                    (void*)start, (void*)(start+len-1));
   1172    } else {
   1173       VG_(bindRangeMap)(gIgnoredAddressRanges,
   1174                         start, start+len-1, IAR_NotIgnored);
   1175       if (verbose)
   1176          VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
   1177                    (void*)start, (void*)(start+len-1));
   1178    }
   1179    if (verbose) {
   1180       VG_(dmsg)("memcheck:   now have %u ranges:\n",
   1181                 VG_(sizeRangeMap)(gIgnoredAddressRanges));
   1182       UInt i;
   1183       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
   1184          UWord val     = IAR_INVALID;
   1185          UWord key_min = ~(UWord)0;
   1186          UWord key_max = (UWord)0;
   1187          VG_(indexRangeMap)( &key_min, &key_max, &val,
   1188                              gIgnoredAddressRanges, i );
   1189          VG_(dmsg)("memcheck:      [%u]  %016lx-%016lx  %s\n",
   1190                    i, key_min, key_max, showIARKind(val));
   1191       }
   1192    }
   1193    return True;
   1194 }
   1195 
   1196 
   1197 /* --------------- Load/store slow cases. --------------- */
   1198 
   1199 static
   1200 __attribute__((noinline))
   1201 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
   1202                                 Addr a, SizeT nBits, Bool bigendian )
   1203 {
   1204    ULong  pessim[4];     /* only used when p-l-ok=yes */
   1205    SSizeT szB            = nBits / 8;
   1206    SSizeT szL            = szB / 8;  /* Size in Longs (64-bit units) */
   1207    SSizeT i, j;          /* Must be signed. */
   1208    SizeT  n_addrs_bad = 0;
   1209    Addr   ai;
   1210    UChar  vbits8;
   1211    Bool   ok;
   1212 
   1213    /* Code below assumes load size is a power of two and at least 64
   1214       bits. */
   1215    tl_assert((szB & (szB-1)) == 0 && szL > 0);
   1216 
   1217    /* If this triggers, you probably just need to increase the size of
   1218       the pessim array. */
   1219    tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
   1220 
   1221    for (j = 0; j < szL; j++) {
   1222       pessim[j] = V_BITS64_DEFINED;
   1223       res[j] = V_BITS64_UNDEFINED;
   1224    }
   1225 
   1226    /* Make up a result V word, which contains the loaded data for
   1227       valid addresses and Defined for invalid addresses.  Iterate over
   1228       the bytes in the word, from the most significant down to the
   1229       least.  The vbits to return are calculated into vbits128.  Also
   1230       compute the pessimising value to be used when
   1231       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
   1232       info can be gleaned from the pessim array) but is used as a
   1233       cross-check. */
   1234    for (j = szL-1; j >= 0; j--) {
   1235       ULong vbits64    = V_BITS64_UNDEFINED;
   1236       ULong pessim64   = V_BITS64_DEFINED;
   1237       UWord long_index = byte_offset_w(szL, bigendian, j);
   1238       for (i = 8-1; i >= 0; i--) {
   1239          PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP);
   1240          ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
   1241          ok = get_vbits8(ai, &vbits8);
   1242          vbits64 <<= 8;
   1243          vbits64 |= vbits8;
   1244          if (!ok) n_addrs_bad++;
   1245          pessim64 <<= 8;
   1246          pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
   1247       }
   1248       res[long_index] = vbits64;
   1249       pessim[long_index] = pessim64;
   1250    }
   1251 
   1252    /* In the common case, all the addresses involved are valid, so we
   1253       just return the computed V bits and have done. */
   1254    if (LIKELY(n_addrs_bad == 0))
   1255       return;
   1256 
   1257    /* If there's no possibility of getting a partial-loads-ok
   1258       exemption, report the error and quit. */
   1259    if (!MC_(clo_partial_loads_ok)) {
   1260       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1261       return;
   1262    }
   1263 
   1264    /* The partial-loads-ok excemption might apply.  Find out if it
   1265       does.  If so, don't report an addressing error, but do return
   1266       Undefined for the bytes that are out of range, so as to avoid
   1267       false negatives.  If it doesn't apply, just report an addressing
   1268       error in the usual way. */
   1269 
   1270    /* Some code steps along byte strings in aligned chunks
   1271       even when there is only a partially defined word at the end (eg,
   1272       optimised strlen).  This is allowed by the memory model of
   1273       modern machines, since an aligned load cannot span two pages and
   1274       thus cannot "partially fault".
   1275 
   1276       Therefore, a load from a partially-addressible place is allowed
   1277       if all of the following hold:
   1278       - the command-line flag is set [by default, it isn't]
   1279       - it's an aligned load
   1280       - at least one of the addresses in the word *is* valid
   1281 
   1282       Since this suppresses the addressing error, we avoid false
   1283       negatives by marking bytes undefined when they come from an
   1284       invalid address.
   1285    */
   1286 
   1287    /* "at least one of the addresses is invalid" */
   1288    ok = False;
   1289    for (j = 0; j < szL; j++)
   1290       ok |= pessim[j] != V_BITS64_DEFINED;
   1291    tl_assert(ok);
   1292 
   1293    if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
   1294       /* Exemption applies.  Use the previously computed pessimising
   1295          value and return the combined result, but don't flag an
   1296          addressing error.  The pessimising value is Defined for valid
   1297          addresses and Undefined for invalid addresses. */
   1298       /* for assumption that doing bitwise or implements UifU */
   1299       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1300       /* (really need "UifU" here...)
   1301          vbits[j] UifU= pessim[j]  (is pessimised by it, iow) */
   1302       for (j = szL-1; j >= 0; j--)
   1303          res[j] |= pessim[j];
   1304       return;
   1305    }
   1306 
   1307    /* Exemption doesn't apply.  Flag an addressing error in the normal
   1308       way. */
   1309    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1310 }
   1311 
   1312 
   1313 static
   1314 __attribute__((noinline))
   1315 __attribute__((used))
   1316 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
   1317                  this function may get called from hand written assembly. */
   1318 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
   1319 {
   1320    PROF_EVENT(MCPE_LOADVN_SLOW);
   1321 
   1322    /* ------------ BEGIN semi-fast cases ------------ */
   1323    /* These deal quickly-ish with the common auxiliary primary map
   1324       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1325       omitted without loss of correctness/functionality.  Note that in
   1326       both cases the "sizeof(void*) == 8" causes these cases to be
   1327       folded out by compilers on 32-bit platforms.  These are derived
   1328       from LOADV64 and LOADV32.
   1329    */
   1330    if (LIKELY(sizeof(void*) == 8
   1331                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1332       SecMap* sm       = get_secmap_for_reading(a);
   1333       UWord   sm_off16 = SM_OFF_16(a);
   1334       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1335       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
   1336          return V_BITS64_DEFINED;
   1337       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
   1338          return V_BITS64_UNDEFINED;
   1339       /* else fall into the slow case */
   1340    }
   1341    if (LIKELY(sizeof(void*) == 8
   1342                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1343       SecMap* sm = get_secmap_for_reading(a);
   1344       UWord sm_off = SM_OFF(a);
   1345       UWord vabits8 = sm->vabits8[sm_off];
   1346       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
   1347          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   1348       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
   1349          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   1350       /* else fall into slow case */
   1351    }
   1352    /* ------------ END semi-fast cases ------------ */
   1353 
   1354    ULong  vbits64     = V_BITS64_UNDEFINED; /* result */
   1355    ULong  pessim64    = V_BITS64_DEFINED;   /* only used when p-l-ok=yes */
   1356    SSizeT szB         = nBits / 8;
   1357    SSizeT i;          /* Must be signed. */
   1358    SizeT  n_addrs_bad = 0;
   1359    Addr   ai;
   1360    UChar  vbits8;
   1361    Bool   ok;
   1362 
   1363    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1364 
   1365    /* Make up a 64-bit result V word, which contains the loaded data
   1366       for valid addresses and Defined for invalid addresses.  Iterate
   1367       over the bytes in the word, from the most significant down to
   1368       the least.  The vbits to return are calculated into vbits64.
   1369       Also compute the pessimising value to be used when
   1370       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
   1371       info can be gleaned from pessim64) but is used as a
   1372       cross-check. */
   1373    for (i = szB-1; i >= 0; i--) {
   1374       PROF_EVENT(MCPE_LOADVN_SLOW_LOOP);
   1375       ai = a + byte_offset_w(szB, bigendian, i);
   1376       ok = get_vbits8(ai, &vbits8);
   1377       vbits64 <<= 8;
   1378       vbits64 |= vbits8;
   1379       if (!ok) n_addrs_bad++;
   1380       pessim64 <<= 8;
   1381       pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
   1382    }
   1383 
   1384    /* In the common case, all the addresses involved are valid, so we
   1385       just return the computed V bits and have done. */
   1386    if (LIKELY(n_addrs_bad == 0))
   1387       return vbits64;
   1388 
   1389    /* If there's no possibility of getting a partial-loads-ok
   1390       exemption, report the error and quit. */
   1391    if (!MC_(clo_partial_loads_ok)) {
   1392       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1393       return vbits64;
   1394    }
   1395 
   1396    /* The partial-loads-ok excemption might apply.  Find out if it
   1397       does.  If so, don't report an addressing error, but do return
   1398       Undefined for the bytes that are out of range, so as to avoid
   1399       false negatives.  If it doesn't apply, just report an addressing
   1400       error in the usual way. */
   1401 
   1402    /* Some code steps along byte strings in aligned word-sized chunks
   1403       even when there is only a partially defined word at the end (eg,
   1404       optimised strlen).  This is allowed by the memory model of
   1405       modern machines, since an aligned load cannot span two pages and
   1406       thus cannot "partially fault".  Despite such behaviour being
   1407       declared undefined by ANSI C/C++.
   1408 
   1409       Therefore, a load from a partially-addressible place is allowed
   1410       if all of the following hold:
   1411       - the command-line flag is set [by default, it isn't]
   1412       - it's a word-sized, word-aligned load
   1413       - at least one of the addresses in the word *is* valid
   1414 
   1415       Since this suppresses the addressing error, we avoid false
   1416       negatives by marking bytes undefined when they come from an
   1417       invalid address.
   1418    */
   1419 
   1420    /* "at least one of the addresses is invalid" */
   1421    tl_assert(pessim64 != V_BITS64_DEFINED);
   1422 
   1423    if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
   1424        && n_addrs_bad < VG_WORDSIZE) {
   1425       /* Exemption applies.  Use the previously computed pessimising
   1426          value for vbits64 and return the combined result, but don't
   1427          flag an addressing error.  The pessimising value is Defined
   1428          for valid addresses and Undefined for invalid addresses. */
   1429       /* for assumption that doing bitwise or implements UifU */
   1430       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1431       /* (really need "UifU" here...)
   1432          vbits64 UifU= pessim64  (is pessimised by it, iow) */
   1433       vbits64 |= pessim64;
   1434       return vbits64;
   1435    }
   1436 
   1437    /* Also, in appears that gcc generates string-stepping code in
   1438       32-bit chunks on 64 bit platforms.  So, also grant an exception
   1439       for this case.  Note that the first clause of the conditional
   1440       (VG_WORDSIZE == 8) is known at compile time, so the whole clause
   1441       will get folded out in 32 bit builds. */
   1442    if (VG_WORDSIZE == 8
   1443        && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
   1444       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1445       /* (really need "UifU" here...)
   1446          vbits64 UifU= pessim64  (is pessimised by it, iow) */
   1447       vbits64 |= pessim64;
   1448       /* Mark the upper 32 bits as undefined, just to be on the safe
   1449          side. */
   1450       vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
   1451       return vbits64;
   1452    }
   1453 
   1454    /* Exemption doesn't apply.  Flag an addressing error in the normal
   1455       way. */
   1456    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1457 
   1458    return vbits64;
   1459 }
   1460 
   1461 
   1462 static
   1463 __attribute__((noinline))
   1464 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
   1465 {
   1466    SizeT szB = nBits / 8;
   1467    SizeT i, n_addrs_bad = 0;
   1468    UChar vbits8;
   1469    Addr  ai;
   1470    Bool  ok;
   1471 
   1472    PROF_EVENT(MCPE_STOREVN_SLOW);
   1473 
   1474    /* ------------ BEGIN semi-fast cases ------------ */
   1475    /* These deal quickly-ish with the common auxiliary primary map
   1476       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1477       omitted without loss of correctness/functionality.  Note that in
   1478       both cases the "sizeof(void*) == 8" causes these cases to be
   1479       folded out by compilers on 32-bit platforms.  The logic below
   1480       is somewhat similar to some cases extensively commented in
   1481       MC_(helperc_STOREV8).
   1482    */
   1483    if (LIKELY(sizeof(void*) == 8
   1484                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1485       SecMap* sm       = get_secmap_for_reading(a);
   1486       UWord   sm_off16 = SM_OFF_16(a);
   1487       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1488       if (LIKELY( !is_distinguished_sm(sm) &&
   1489                           (VA_BITS16_DEFINED   == vabits16 ||
   1490                            VA_BITS16_UNDEFINED == vabits16) )) {
   1491          /* Handle common case quickly: a is suitably aligned, */
   1492          /* is mapped, and is addressible. */
   1493          // Convert full V-bits in register to compact 2-bit form.
   1494          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
   1495             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
   1496             return;
   1497          } else if (V_BITS64_UNDEFINED == vbytes) {
   1498             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
   1499             return;
   1500          }
   1501          /* else fall into the slow case */
   1502       }
   1503       /* else fall into the slow case */
   1504    }
   1505    if (LIKELY(sizeof(void*) == 8
   1506                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1507       SecMap* sm      = get_secmap_for_reading(a);
   1508       UWord   sm_off  = SM_OFF(a);
   1509       UWord   vabits8 = sm->vabits8[sm_off];
   1510       if (LIKELY( !is_distinguished_sm(sm) &&
   1511                           (VA_BITS8_DEFINED   == vabits8 ||
   1512                            VA_BITS8_UNDEFINED == vabits8) )) {
   1513          /* Handle common case quickly: a is suitably aligned, */
   1514          /* is mapped, and is addressible. */
   1515          // Convert full V-bits in register to compact 2-bit form.
   1516          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
   1517             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
   1518             return;
   1519          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
   1520             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   1521             return;
   1522          }
   1523          /* else fall into the slow case */
   1524       }
   1525       /* else fall into the slow case */
   1526    }
   1527    /* ------------ END semi-fast cases ------------ */
   1528 
   1529    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1530 
   1531    /* Dump vbytes in memory, iterating from least to most significant
   1532       byte.  At the same time establish addressibility of the location. */
   1533    for (i = 0; i < szB; i++) {
   1534       PROF_EVENT(MCPE_STOREVN_SLOW_LOOP);
   1535       ai     = a + byte_offset_w(szB, bigendian, i);
   1536       vbits8 = vbytes & 0xff;
   1537       ok     = set_vbits8(ai, vbits8);
   1538       if (!ok) n_addrs_bad++;
   1539       vbytes >>= 8;
   1540    }
   1541 
   1542    /* If an address error has happened, report it. */
   1543    if (n_addrs_bad > 0)
   1544       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
   1545 }
   1546 
   1547 
   1548 /*------------------------------------------------------------*/
   1549 /*--- Setting permissions over address ranges.             ---*/
   1550 /*------------------------------------------------------------*/
   1551 
   1552 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
   1553                                       UWord dsm_num )
   1554 {
   1555    UWord    sm_off, sm_off16;
   1556    UWord    vabits2 = vabits16 & 0x3;
   1557    SizeT    lenA, lenB, len_to_next_secmap;
   1558    Addr     aNext;
   1559    SecMap*  sm;
   1560    SecMap** sm_ptr;
   1561    SecMap*  example_dsm;
   1562 
   1563    PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS);
   1564 
   1565    /* Check the V+A bits make sense. */
   1566    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
   1567              VA_BITS16_UNDEFINED == vabits16 ||
   1568              VA_BITS16_DEFINED   == vabits16);
   1569 
   1570    // This code should never write PDBs;  ensure this.  (See comment above
   1571    // set_vabits2().)
   1572    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
   1573 
   1574    if (lenT == 0)
   1575       return;
   1576 
   1577    if (lenT > 256 * 1024 * 1024) {
   1578       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
   1579          const HChar* s = "unknown???";
   1580          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
   1581          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
   1582          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
   1583          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
   1584                                   "large range [0x%lx, 0x%lx) (%s)\n",
   1585                                   a, a + lenT, s);
   1586       }
   1587    }
   1588 
   1589 #ifndef PERF_FAST_SARP
   1590    /*------------------ debug-only case ------------------ */
   1591    {
   1592       // Endianness doesn't matter here because all bytes are being set to
   1593       // the same value.
   1594       // Nb: We don't have to worry about updating the sec-V-bits table
   1595       // after these set_vabits2() calls because this code never writes
   1596       // VA_BITS2_PARTDEFINED values.
   1597       SizeT i;
   1598       for (i = 0; i < lenT; i++) {
   1599          set_vabits2(a + i, vabits2);
   1600       }
   1601       return;
   1602    }
   1603 #endif
   1604 
   1605    /*------------------ standard handling ------------------ */
   1606 
   1607    /* Get the distinguished secondary that we might want
   1608       to use (part of the space-compression scheme). */
   1609    example_dsm = &sm_distinguished[dsm_num];
   1610 
   1611    // We have to handle ranges covering various combinations of partial and
   1612    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
   1613    // Cases marked with a '*' are common.
   1614    //
   1615    //   TYPE                                             PARTS USED
   1616    //   ----                                             ----------
   1617    // * one partial sec-map                  (p)         1
   1618    // - one whole sec-map                    (P)         2
   1619    //
   1620    // * two partial sec-maps                 (pp)        1,3
   1621    // - one partial, one whole sec-map       (pP)        1,2
   1622    // - one whole, one partial sec-map       (Pp)        2,3
   1623    // - two whole sec-maps                   (PP)        2,2
   1624    //
   1625    // * one partial, one whole, one partial  (pPp)       1,2,3
   1626    // - one partial, two whole               (pPP)       1,2,2
   1627    // - two whole, one partial               (PPp)       2,2,3
   1628    // - three whole                          (PPP)       2,2,2
   1629    //
   1630    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
   1631    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
   1632    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
   1633    // - N whole                              (PP...PP)   2,2...2,3
   1634 
   1635    // Break up total length (lenT) into two parts:  length in the first
   1636    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
   1637    aNext = start_of_this_sm(a) + SM_SIZE;
   1638    len_to_next_secmap = aNext - a;
   1639    if ( lenT <= len_to_next_secmap ) {
   1640       // Range entirely within one sec-map.  Covers almost all cases.
   1641       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP);
   1642       lenA = lenT;
   1643       lenB = 0;
   1644    } else if (is_start_of_sm(a)) {
   1645       // Range spans at least one whole sec-map, and starts at the beginning
   1646       // of a sec-map; skip to Part 2.
   1647       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP);
   1648       lenA = 0;
   1649       lenB = lenT;
   1650       goto part2;
   1651    } else {
   1652       // Range spans two or more sec-maps, first one is partial.
   1653       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS);
   1654       lenA = len_to_next_secmap;
   1655       lenB = lenT - lenA;
   1656    }
   1657 
   1658    //------------------------------------------------------------------------
   1659    // Part 1: Deal with the first sec_map.  Most of the time the range will be
   1660    // entirely within a sec_map and this part alone will suffice.  Also,
   1661    // doing it this way lets us avoid repeatedly testing for the crossing of
   1662    // a sec-map boundary within these loops.
   1663    //------------------------------------------------------------------------
   1664 
   1665    // If it's distinguished, make it undistinguished if necessary.
   1666    sm_ptr = get_secmap_ptr(a);
   1667    if (is_distinguished_sm(*sm_ptr)) {
   1668       if (*sm_ptr == example_dsm) {
   1669          // Sec-map already has the V+A bits that we want, so skip.
   1670          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK);
   1671          a    = aNext;
   1672          lenA = 0;
   1673       } else {
   1674          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1);
   1675          *sm_ptr = copy_for_writing(*sm_ptr);
   1676       }
   1677    }
   1678    sm = *sm_ptr;
   1679 
   1680    // 1 byte steps
   1681    while (True) {
   1682       if (VG_IS_8_ALIGNED(a)) break;
   1683       if (lenA < 1)           break;
   1684       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A);
   1685       sm_off = SM_OFF(a);
   1686       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1687       a    += 1;
   1688       lenA -= 1;
   1689    }
   1690    // 8-aligned, 8 byte steps
   1691    while (True) {
   1692       if (lenA < 8) break;
   1693       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A);
   1694       sm_off16 = SM_OFF_16(a);
   1695       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1696       a    += 8;
   1697       lenA -= 8;
   1698    }
   1699    // 1 byte steps
   1700    while (True) {
   1701       if (lenA < 1) break;
   1702       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B);
   1703       sm_off = SM_OFF(a);
   1704       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1705       a    += 1;
   1706       lenA -= 1;
   1707    }
   1708 
   1709    // We've finished the first sec-map.  Is that it?
   1710    if (lenB == 0)
   1711       return;
   1712 
   1713    //------------------------------------------------------------------------
   1714    // Part 2: Fast-set entire sec-maps at a time.
   1715    //------------------------------------------------------------------------
   1716   part2:
   1717    // 64KB-aligned, 64KB steps.
   1718    // Nb: we can reach here with lenB < SM_SIZE
   1719    tl_assert(0 == lenA);
   1720    while (True) {
   1721       if (lenB < SM_SIZE) break;
   1722       tl_assert(is_start_of_sm(a));
   1723       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K);
   1724       sm_ptr = get_secmap_ptr(a);
   1725       if (!is_distinguished_sm(*sm_ptr)) {
   1726          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM);
   1727          // Free the non-distinguished sec-map that we're replacing.  This
   1728          // case happens moderately often, enough to be worthwhile.
   1729          SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
   1730          tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
   1731       }
   1732       update_SM_counts(*sm_ptr, example_dsm);
   1733       // Make the sec-map entry point to the example DSM
   1734       *sm_ptr = example_dsm;
   1735       lenB -= SM_SIZE;
   1736       a    += SM_SIZE;
   1737    }
   1738 
   1739    // We've finished the whole sec-maps.  Is that it?
   1740    if (lenB == 0)
   1741       return;
   1742 
   1743    //------------------------------------------------------------------------
   1744    // Part 3: Finish off the final partial sec-map, if necessary.
   1745    //------------------------------------------------------------------------
   1746 
   1747    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
   1748 
   1749    // If it's distinguished, make it undistinguished if necessary.
   1750    sm_ptr = get_secmap_ptr(a);
   1751    if (is_distinguished_sm(*sm_ptr)) {
   1752       if (*sm_ptr == example_dsm) {
   1753          // Sec-map already has the V+A bits that we want, so stop.
   1754          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK);
   1755          return;
   1756       } else {
   1757          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2);
   1758          *sm_ptr = copy_for_writing(*sm_ptr);
   1759       }
   1760    }
   1761    sm = *sm_ptr;
   1762 
   1763    // 8-aligned, 8 byte steps
   1764    while (True) {
   1765       if (lenB < 8) break;
   1766       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B);
   1767       sm_off16 = SM_OFF_16(a);
   1768       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1769       a    += 8;
   1770       lenB -= 8;
   1771    }
   1772    // 1 byte steps
   1773    while (True) {
   1774       if (lenB < 1) return;
   1775       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C);
   1776       sm_off = SM_OFF(a);
   1777       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1778       a    += 1;
   1779       lenB -= 1;
   1780    }
   1781 }
   1782 
   1783 
   1784 /* --- Set permissions for arbitrary address ranges --- */
   1785 
   1786 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
   1787 {
   1788    PROF_EVENT(MCPE_MAKE_MEM_NOACCESS);
   1789    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
   1790    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
   1791    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1792       ocache_sarp_Clear_Origins ( a, len );
   1793 }
   1794 
   1795 static void make_mem_undefined ( Addr a, SizeT len )
   1796 {
   1797    PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED);
   1798    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
   1799    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1800 }
   1801 
   1802 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
   1803 {
   1804    PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG);
   1805    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
   1806    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1807    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1808       ocache_sarp_Set_Origins ( a, len, otag );
   1809 }
   1810 
   1811 static
   1812 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
   1813                                           ThreadId tid, UInt okind )
   1814 {
   1815    UInt        ecu;
   1816    ExeContext* here;
   1817    /* VG_(record_ExeContext) checks for validity of tid, and asserts
   1818       if it is invalid.  So no need to do it here. */
   1819    tl_assert(okind <= 3);
   1820    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
   1821    tl_assert(here);
   1822    ecu = VG_(get_ECU_from_ExeContext)(here);
   1823    tl_assert(VG_(is_plausible_ECU)(ecu));
   1824    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
   1825 }
   1826 
   1827 static
   1828 void mc_new_mem_w_tid_make_ECU  ( Addr a, SizeT len, ThreadId tid )
   1829 {
   1830    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
   1831 }
   1832 
   1833 static
   1834 void mc_new_mem_w_tid_no_ECU  ( Addr a, SizeT len, ThreadId tid )
   1835 {
   1836    MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
   1837 }
   1838 
   1839 void MC_(make_mem_defined) ( Addr a, SizeT len )
   1840 {
   1841    PROF_EVENT(MCPE_MAKE_MEM_DEFINED);
   1842    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
   1843    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
   1844    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1845       ocache_sarp_Clear_Origins ( a, len );
   1846 }
   1847 
   1848 __attribute__((unused))
   1849 static void make_mem_defined_w_tid ( Addr a, SizeT len, ThreadId tid )
   1850 {
   1851    MC_(make_mem_defined)(a, len);
   1852 }
   1853 
   1854 /* For each byte in [a,a+len), if the byte is addressable, make it be
   1855    defined, but if it isn't addressible, leave it alone.  In other
   1856    words a version of MC_(make_mem_defined) that doesn't mess with
   1857    addressibility.  Low-performance implementation. */
   1858 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
   1859 {
   1860    SizeT i;
   1861    UChar vabits2;
   1862    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
   1863    for (i = 0; i < len; i++) {
   1864       vabits2 = get_vabits2( a+i );
   1865       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
   1866          set_vabits2(a+i, VA_BITS2_DEFINED);
   1867          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1868             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1869          }
   1870       }
   1871    }
   1872 }
   1873 
   1874 /* Similarly (needed for mprotect handling ..) */
   1875 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
   1876 {
   1877    SizeT i;
   1878    UChar vabits2;
   1879    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
   1880    for (i = 0; i < len; i++) {
   1881       vabits2 = get_vabits2( a+i );
   1882       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
   1883          set_vabits2(a+i, VA_BITS2_DEFINED);
   1884          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1885             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1886          }
   1887       }
   1888    }
   1889 }
   1890 
   1891 /* --- Block-copy permissions (needed for implementing realloc() and
   1892        sys_mremap). --- */
   1893 
   1894 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
   1895 {
   1896    SizeT i, j;
   1897    UChar vabits2, vabits8;
   1898    Bool  aligned, nooverlap;
   1899 
   1900    DEBUG("MC_(copy_address_range_state)\n");
   1901    PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE);
   1902 
   1903    if (len == 0 || src == dst)
   1904       return;
   1905 
   1906    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
   1907    nooverlap = src+len <= dst || dst+len <= src;
   1908 
   1909    if (nooverlap && aligned) {
   1910 
   1911       /* Vectorised fast case, when no overlap and suitably aligned */
   1912       /* vector loop */
   1913       i = 0;
   1914       while (len >= 4) {
   1915          vabits8 = get_vabits8_for_aligned_word32( src+i );
   1916          set_vabits8_for_aligned_word32( dst+i, vabits8 );
   1917          if (LIKELY(VA_BITS8_DEFINED == vabits8
   1918                             || VA_BITS8_UNDEFINED == vabits8
   1919                             || VA_BITS8_NOACCESS == vabits8)) {
   1920             /* do nothing */
   1921          } else {
   1922             /* have to copy secondary map info */
   1923             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
   1924                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
   1925             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
   1926                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
   1927             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
   1928                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
   1929             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
   1930                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
   1931          }
   1932          i += 4;
   1933          len -= 4;
   1934       }
   1935       /* fixup loop */
   1936       while (len >= 1) {
   1937          vabits2 = get_vabits2( src+i );
   1938          set_vabits2( dst+i, vabits2 );
   1939          if (VA_BITS2_PARTDEFINED == vabits2) {
   1940             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1941          }
   1942          i++;
   1943          len--;
   1944       }
   1945 
   1946    } else {
   1947 
   1948       /* We have to do things the slow way */
   1949       if (src < dst) {
   1950          for (i = 0, j = len-1; i < len; i++, j--) {
   1951             PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1);
   1952             vabits2 = get_vabits2( src+j );
   1953             set_vabits2( dst+j, vabits2 );
   1954             if (VA_BITS2_PARTDEFINED == vabits2) {
   1955                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
   1956             }
   1957          }
   1958       }
   1959 
   1960       if (src > dst) {
   1961          for (i = 0; i < len; i++) {
   1962             PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2);
   1963             vabits2 = get_vabits2( src+i );
   1964             set_vabits2( dst+i, vabits2 );
   1965             if (VA_BITS2_PARTDEFINED == vabits2) {
   1966                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1967             }
   1968          }
   1969       }
   1970    }
   1971 
   1972 }
   1973 
   1974 
   1975 /*------------------------------------------------------------*/
   1976 /*--- Origin tracking stuff - cache basics                 ---*/
   1977 /*------------------------------------------------------------*/
   1978 
   1979 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
   1980    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1981 
   1982    Note that this implementation draws inspiration from the "origin
   1983    tracking by value piggybacking" scheme described in "Tracking Bad
   1984    Apples: Reporting the Origin of Null and Undefined Value Errors"
   1985    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
   1986    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
   1987    implemented completely differently.
   1988 
   1989    Origin tags and ECUs -- about the shadow values
   1990    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1991 
   1992    This implementation tracks the defining point of all uninitialised
   1993    values using so called "origin tags", which are 32-bit integers,
   1994    rather than using the values themselves to encode the origins.  The
   1995    latter, so-called value piggybacking", is what the OOPSLA07 paper
   1996    describes.
   1997 
   1998    Origin tags, as tracked by the machinery below, are 32-bit unsigned
   1999    ints (UInts), regardless of the machine's word size.  Each tag
   2000    comprises an upper 30-bit ECU field and a lower 2-bit
   2001    'kind' field.  The ECU field is a number given out by m_execontext
   2002    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
   2003    directly as an origin tag (otag), but in fact we want to put
   2004    additional information 'kind' field to indicate roughly where the
   2005    tag came from.  This helps print more understandable error messages
   2006    for the user -- it has no other purpose.  In summary:
   2007 
   2008    * Both ECUs and origin tags are represented as 32-bit words
   2009 
   2010    * m_execontext and the core-tool interface deal purely in ECUs.
   2011      They have no knowledge of origin tags - that is a purely
   2012      Memcheck-internal matter.
   2013 
   2014    * all valid ECUs have the lowest 2 bits zero and at least
   2015      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
   2016 
   2017    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
   2018      constants defined in mc_include.h.
   2019 
   2020    * to convert an otag back to an ECU, AND it with ~3
   2021 
   2022    One important fact is that no valid otag is zero.  A zero otag is
   2023    used by the implementation to indicate "no origin", which could
   2024    mean that either the value is defined, or it is undefined but the
   2025    implementation somehow managed to lose the origin.
   2026 
   2027    The ECU used for memory created by malloc etc is derived from the
   2028    stack trace at the time the malloc etc happens.  This means the
   2029    mechanism can show the exact allocation point for heap-created
   2030    uninitialised values.
   2031 
   2032    In contrast, it is simply too expensive to create a complete
   2033    backtrace for each stack allocation.  Therefore we merely use a
   2034    depth-1 backtrace for stack allocations, which can be done once at
   2035    translation time, rather than N times at run time.  The result of
   2036    this is that, for stack created uninitialised values, Memcheck can
   2037    only show the allocating function, and not what called it.
   2038    Furthermore, compilers tend to move the stack pointer just once at
   2039    the start of the function, to allocate all locals, and so in fact
   2040    the stack origin almost always simply points to the opening brace
   2041    of the function.  Net result is, for stack origins, the mechanism
   2042    can tell you in which function the undefined value was created, but
   2043    that's all.  Users will need to carefully check all locals in the
   2044    specified function.
   2045 
   2046    Shadowing registers and memory
   2047    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2048 
   2049    Memory is shadowed using a two level cache structure (ocacheL1 and
   2050    ocacheL2).  Memory references are first directed to ocacheL1.  This
   2051    is a traditional 2-way set associative cache with 32-byte lines and
   2052    approximate LRU replacement within each set.
   2053 
   2054    A naive implementation would require storing one 32 bit otag for
   2055    each byte of memory covered, a 4:1 space overhead.  Instead, there
   2056    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
   2057    that shows which of the 4 bytes have that shadow value and which
   2058    have a shadow value of zero (indicating no origin).  Hence a lot of
   2059    space is saved, but the cost is that only one different origin per
   2060    4 bytes of address space can be represented.  This is a source of
   2061    imprecision, but how much of a problem it really is remains to be
   2062    seen.
   2063 
   2064    A cache line that contains all zeroes ("no origins") contains no
   2065    useful information, and can be ejected from the L1 cache "for
   2066    free", in the sense that a read miss on the L1 causes a line of
   2067    zeroes to be installed.  However, ejecting a line containing
   2068    nonzeroes risks losing origin information permanently.  In order to
   2069    prevent such lossage, ejected nonzero lines are placed in a
   2070    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
   2071    lines.  This can grow arbitrarily large, and so should ensure that
   2072    Memcheck runs out of memory in preference to losing useful origin
   2073    info due to cache size limitations.
   2074 
   2075    Shadowing registers is a bit tricky, because the shadow values are
   2076    32 bits, regardless of the size of the register.  That gives a
   2077    problem for registers smaller than 32 bits.  The solution is to
   2078    find spaces in the guest state that are unused, and use those to
   2079    shadow guest state fragments smaller than 32 bits.  For example, on
   2080    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
   2081    shadow are allocated for the register's otag, then there are still
   2082    12 bytes left over which could be used to shadow 3 other values.
   2083 
   2084    This implies there is some non-obvious mapping from guest state
   2085    (start,length) pairs to the relevant shadow offset (for the origin
   2086    tags).  And it is unfortunately guest-architecture specific.  The
   2087    mapping is contained in mc_machine.c, which is quite lengthy but
   2088    straightforward.
   2089 
   2090    Instrumenting the IR
   2091    ~~~~~~~~~~~~~~~~~~~~
   2092 
   2093    Instrumentation is largely straightforward, and done by the
   2094    functions schemeE and schemeS in mc_translate.c.  These generate
   2095    code for handling the origin tags of expressions (E) and statements
   2096    (S) respectively.  The rather strange names are a reference to the
   2097    "compilation schemes" shown in Simon Peyton Jones' book "The
   2098    Implementation of Functional Programming Languages" (Prentice Hall,
   2099    1987, see
   2100    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
   2101 
   2102    schemeS merely arranges to move shadow values around the guest
   2103    state to track the incoming IR.  schemeE is largely trivial too.
   2104    The only significant point is how to compute the otag corresponding
   2105    to binary (or ternary, quaternary, etc) operator applications.  The
   2106    rule is simple: just take whichever value is larger (32-bit
   2107    unsigned max).  Constants get the special value zero.  Hence this
   2108    rule always propagates a nonzero (known) otag in preference to a
   2109    zero (unknown, or more likely, value-is-defined) tag, as we want.
   2110    If two different undefined values are inputs to a binary operator
   2111    application, then which is propagated is arbitrary, but that
   2112    doesn't matter, since the program is erroneous in using either of
   2113    the values, and so there's no point in attempting to propagate
   2114    both.
   2115 
   2116    Since constants are abstracted to (otag) zero, much of the
   2117    instrumentation code can be folded out without difficulty by the
   2118    generic post-instrumentation IR cleanup pass, using these rules:
   2119    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
   2120    constants is evaluated at JIT time.  And the resulting dead code
   2121    removal.  In practice this causes surprisingly few Max32Us to
   2122    survive through to backend code generation.
   2123 
   2124    Integration with the V-bits machinery
   2125    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2126 
   2127    This is again largely straightforward.  Mostly the otag and V bits
   2128    stuff are independent.  The only point of interaction is when the V
   2129    bits instrumenter creates a call to a helper function to report an
   2130    uninitialised value error -- in that case it must first use schemeE
   2131    to get hold of the origin tag expression for the value, and pass
   2132    that to the helper too.
   2133 
   2134    There is the usual stuff to do with setting address range
   2135    permissions.  When memory is painted undefined, we must also know
   2136    the origin tag to paint with, which involves some tedious plumbing,
   2137    particularly to do with the fast case stack handlers.  When memory
   2138    is painted defined or noaccess then the origin tags must be forced
   2139    to zero.
   2140 
   2141    One of the goals of the implementation was to ensure that the
   2142    non-origin tracking mode isn't slowed down at all.  To do this,
   2143    various functions to do with memory permissions setting (again,
   2144    mostly pertaining to the stack) are duplicated for the with- and
   2145    without-otag case.
   2146 
   2147    Dealing with stack redzones, and the NIA cache
   2148    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2149 
   2150    This is one of the few non-obvious parts of the implementation.
   2151 
   2152    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
   2153    reserved area below the stack pointer, that can be used as scratch
   2154    space by compiler generated code for functions.  In the Memcheck
   2155    sources this is referred to as the "stack redzone".  The important
   2156    thing here is that such redzones are considered volatile across
   2157    function calls and returns.  So Memcheck takes care to mark them as
   2158    undefined for each call and return, on the afflicted platforms.
   2159    Past experience shows this is essential in order to get reliable
   2160    messages about uninitialised values that come from the stack.
   2161 
   2162    So the question is, when we paint a redzone undefined, what origin
   2163    tag should we use for it?  Consider a function f() calling g().  If
   2164    we paint the redzone using an otag derived from the ExeContext of
   2165    the CALL/BL instruction in f, then any errors in g causing it to
   2166    use uninitialised values that happen to lie in the redzone, will be
   2167    reported as having their origin in f.  Which is highly confusing.
   2168 
   2169    The same applies for returns: if, on a return, we paint the redzone
   2170    using a origin tag derived from the ExeContext of the RET/BLR
   2171    instruction in g, then any later errors in f causing it to use
   2172    uninitialised values in the redzone, will be reported as having
   2173    their origin in g.  Which is just as confusing.
   2174 
   2175    To do it right, in both cases we need to use an origin tag which
   2176    pertains to the instruction which dynamically follows the CALL/BL
   2177    or RET/BLR.  In short, one derived from the NIA - the "next
   2178    instruction address".
   2179 
   2180    To make this work, Memcheck's redzone-painting helper,
   2181    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
   2182    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
   2183    ExeContext's ECU as the basis for the otag used to paint the
   2184    redzone.  The expensive part of this is converting an NIA into an
   2185    ECU, since this happens once for every call and every return.  So
   2186    we use a simple 511-line, 2-way set associative cache
   2187    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
   2188    the cost out.
   2189 
   2190    Further background comments
   2191    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2192 
   2193    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
   2194    > it really just the address of the relevant ExeContext?
   2195 
   2196    Well, it's not the address, but a value which has a 1-1 mapping
   2197    with ExeContexts, and is guaranteed not to be zero, since zero
   2198    denotes (to memcheck) "unknown origin or defined value".  So these
   2199    UInts are just numbers starting at 4 and incrementing by 4; each
   2200    ExeContext is given a number when it is created.  (*** NOTE this
   2201    confuses otags and ECUs; see comments above ***).
   2202 
   2203    Making these otags 32-bit regardless of the machine's word size
   2204    makes the 64-bit implementation easier (next para).  And it doesn't
   2205    really limit us in any way, since for the tags to overflow would
   2206    require that the program somehow caused 2^30-1 different
   2207    ExeContexts to be created, in which case it is probably in deep
   2208    trouble.  Not to mention V will have soaked up many tens of
   2209    gigabytes of memory merely to store them all.
   2210 
   2211    So having 64-bit origins doesn't really buy you anything, and has
   2212    the following downsides:
   2213 
   2214    Suppose that instead, an otag is a UWord.  This would mean that, on
   2215    a 64-bit target,
   2216 
   2217    1. It becomes hard to shadow any element of guest state which is
   2218       smaller than 8 bytes.  To do so means you'd need to find some
   2219       8-byte-sized hole in the guest state which you don't want to
   2220       shadow, and use that instead to hold the otag.  On ppc64, the
   2221       condition code register(s) are split into 20 UChar sized pieces,
   2222       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
   2223       and so that would entail finding 160 bytes somewhere else in the
   2224       guest state.
   2225 
   2226       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
   2227       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
   2228       same) and so I had to look for 4 untracked otag-sized areas in
   2229       the guest state to make that possible.
   2230 
   2231       The same problem exists of course when origin tags are only 32
   2232       bits, but it's less extreme.
   2233 
   2234    2. (More compelling) it doubles the size of the origin shadow
   2235       memory.  Given that the shadow memory is organised as a fixed
   2236       size cache, and that accuracy of tracking is limited by origins
   2237       falling out the cache due to space conflicts, this isn't good.
   2238 
   2239    > Another question: is the origin tracking perfect, or are there
   2240    > cases where it fails to determine an origin?
   2241 
   2242    It is imperfect for at least for the following reasons, and
   2243    probably more:
   2244 
   2245    * Insufficient capacity in the origin cache.  When a line is
   2246      evicted from the cache it is gone forever, and so subsequent
   2247      queries for the line produce zero, indicating no origin
   2248      information.  Interestingly, a line containing all zeroes can be
   2249      evicted "free" from the cache, since it contains no useful
   2250      information, so there is scope perhaps for some cleverer cache
   2251      management schemes.  (*** NOTE, with the introduction of the
   2252      second level origin tag cache, ocacheL2, this is no longer a
   2253      problem. ***)
   2254 
   2255    * The origin cache only stores one otag per 32-bits of address
   2256      space, plus 4 bits indicating which of the 4 bytes has that tag
   2257      and which are considered defined.  The result is that if two
   2258      undefined bytes in the same word are stored in memory, the first
   2259      stored byte's origin will be lost and replaced by the origin for
   2260      the second byte.
   2261 
   2262    * Nonzero origin tags for defined values.  Consider a binary
   2263      operator application op(x,y).  Suppose y is undefined (and so has
   2264      a valid nonzero origin tag), and x is defined, but erroneously
   2265      has a nonzero origin tag (defined values should have tag zero).
   2266      If the erroneous tag has a numeric value greater than y's tag,
   2267      then the rule for propagating origin tags though binary
   2268      operations, which is simply to take the unsigned max of the two
   2269      tags, will erroneously propagate x's tag rather than y's.
   2270 
   2271    * Some obscure uses of x86/amd64 byte registers can cause lossage
   2272      or confusion of origins.  %AH .. %DH are treated as different
   2273      from, and unrelated to, their parent registers, %EAX .. %EDX.
   2274      So some weird sequences like
   2275 
   2276         movb undefined-value, %AH
   2277         movb defined-value, %AL
   2278         .. use %AX or %EAX ..
   2279 
   2280      will cause the origin attributed to %AH to be ignored, since %AL,
   2281      %AX, %EAX are treated as the same register, and %AH as a
   2282      completely separate one.
   2283 
   2284    But having said all that, it actually seems to work fairly well in
   2285    practice.
   2286 */
   2287 
   2288 static UWord stats_ocacheL1_find           = 0;
   2289 static UWord stats_ocacheL1_found_at_1     = 0;
   2290 static UWord stats_ocacheL1_found_at_N     = 0;
   2291 static UWord stats_ocacheL1_misses         = 0;
   2292 static UWord stats_ocacheL1_lossage        = 0;
   2293 static UWord stats_ocacheL1_movefwds       = 0;
   2294 
   2295 static UWord stats__ocacheL2_refs          = 0;
   2296 static UWord stats__ocacheL2_misses        = 0;
   2297 static UWord stats__ocacheL2_n_nodes_max   = 0;
   2298 
   2299 /* Cache of 32-bit values, one every 32 bits of address space */
   2300 
   2301 #define OC_BITS_PER_LINE 5
   2302 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
   2303 
   2304 static INLINE UWord oc_line_offset ( Addr a ) {
   2305    return (a >> 2) & (OC_W32S_PER_LINE - 1);
   2306 }
   2307 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
   2308    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
   2309 }
   2310 
   2311 #define OC_LINES_PER_SET 2
   2312 
   2313 #define OC_N_SET_BITS    20
   2314 #define OC_N_SETS        (1 << OC_N_SET_BITS)
   2315 
   2316 /* These settings give:
   2317    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
   2318    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
   2319 */
   2320 
   2321 #define OC_MOVE_FORWARDS_EVERY_BITS 7
   2322 
   2323 
   2324 typedef
   2325    struct {
   2326       Addr  tag;
   2327       UInt  w32[OC_W32S_PER_LINE];
   2328       UChar descr[OC_W32S_PER_LINE];
   2329    }
   2330    OCacheLine;
   2331 
   2332 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
   2333    in use, 'n' (nonzero) if it contains at least one valid origin tag,
   2334    and 'z' if all the represented tags are zero. */
   2335 static UChar classify_OCacheLine ( OCacheLine* line )
   2336 {
   2337    UWord i;
   2338    if (line->tag == 1/*invalid*/)
   2339       return 'e'; /* EMPTY */
   2340    tl_assert(is_valid_oc_tag(line->tag));
   2341    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2342       tl_assert(0 == ((~0xF) & line->descr[i]));
   2343       if (line->w32[i] > 0 && line->descr[i] > 0)
   2344          return 'n'; /* NONZERO - contains useful info */
   2345    }
   2346    return 'z'; /* ZERO - no useful info */
   2347 }
   2348 
   2349 typedef
   2350    struct {
   2351       OCacheLine line[OC_LINES_PER_SET];
   2352    }
   2353    OCacheSet;
   2354 
   2355 typedef
   2356    struct {
   2357       OCacheSet set[OC_N_SETS];
   2358    }
   2359    OCache;
   2360 
   2361 static OCache* ocacheL1 = NULL;
   2362 static UWord   ocacheL1_event_ctr = 0;
   2363 
   2364 static void init_ocacheL2 ( void ); /* fwds */
   2365 static void init_OCache ( void )
   2366 {
   2367    UWord line, set;
   2368    tl_assert(MC_(clo_mc_level) >= 3);
   2369    tl_assert(ocacheL1 == NULL);
   2370    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
   2371    if (ocacheL1 == NULL) {
   2372       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
   2373                                    sizeof(OCache) );
   2374    }
   2375    tl_assert(ocacheL1 != NULL);
   2376    for (set = 0; set < OC_N_SETS; set++) {
   2377       for (line = 0; line < OC_LINES_PER_SET; line++) {
   2378          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
   2379       }
   2380    }
   2381    init_ocacheL2();
   2382 }
   2383 
   2384 static void moveLineForwards ( OCacheSet* set, UWord lineno )
   2385 {
   2386    OCacheLine tmp;
   2387    stats_ocacheL1_movefwds++;
   2388    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
   2389    tmp = set->line[lineno-1];
   2390    set->line[lineno-1] = set->line[lineno];
   2391    set->line[lineno] = tmp;
   2392 }
   2393 
   2394 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
   2395    UWord i;
   2396    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2397       line->w32[i] = 0; /* NO ORIGIN */
   2398       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
   2399    }
   2400    line->tag = tag;
   2401 }
   2402 
   2403 //////////////////////////////////////////////////////////////
   2404 //// OCache backing store
   2405 
   2406 static OSet* ocacheL2 = NULL;
   2407 
   2408 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
   2409    return VG_(malloc)(cc, szB);
   2410 }
   2411 static void ocacheL2_free ( void* v ) {
   2412    VG_(free)( v );
   2413 }
   2414 
   2415 /* Stats: # nodes currently in tree */
   2416 static UWord stats__ocacheL2_n_nodes = 0;
   2417 
   2418 static void init_ocacheL2 ( void )
   2419 {
   2420    tl_assert(!ocacheL2);
   2421    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
   2422    tl_assert(0 == offsetof(OCacheLine,tag));
   2423    ocacheL2
   2424       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
   2425                              NULL, /* fast cmp */
   2426                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
   2427    stats__ocacheL2_n_nodes = 0;
   2428 }
   2429 
   2430 /* Find line with the given tag in the tree, or NULL if not found. */
   2431 static OCacheLine* ocacheL2_find_tag ( Addr tag )
   2432 {
   2433    OCacheLine* line;
   2434    tl_assert(is_valid_oc_tag(tag));
   2435    stats__ocacheL2_refs++;
   2436    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
   2437    return line;
   2438 }
   2439 
   2440 /* Delete the line with the given tag from the tree, if it is present, and
   2441    free up the associated memory. */
   2442 static void ocacheL2_del_tag ( Addr tag )
   2443 {
   2444    OCacheLine* line;
   2445    tl_assert(is_valid_oc_tag(tag));
   2446    stats__ocacheL2_refs++;
   2447    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
   2448    if (line) {
   2449       VG_(OSetGen_FreeNode)(ocacheL2, line);
   2450       tl_assert(stats__ocacheL2_n_nodes > 0);
   2451       stats__ocacheL2_n_nodes--;
   2452    }
   2453 }
   2454 
   2455 /* Add a copy of the given line to the tree.  It must not already be
   2456    present. */
   2457 static void ocacheL2_add_line ( OCacheLine* line )
   2458 {
   2459    OCacheLine* copy;
   2460    tl_assert(is_valid_oc_tag(line->tag));
   2461    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
   2462    *copy = *line;
   2463    stats__ocacheL2_refs++;
   2464    VG_(OSetGen_Insert)( ocacheL2, copy );
   2465    stats__ocacheL2_n_nodes++;
   2466    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
   2467       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
   2468 }
   2469 
   2470 ////
   2471 //////////////////////////////////////////////////////////////
   2472 
   2473 __attribute__((noinline))
   2474 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
   2475 {
   2476    OCacheLine *victim, *inL2;
   2477    UChar c;
   2478    UWord line;
   2479    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2480    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2481    UWord tag     = a & tagmask;
   2482    tl_assert(setno >= 0 && setno < OC_N_SETS);
   2483 
   2484    /* we already tried line == 0; skip therefore. */
   2485    for (line = 1; line < OC_LINES_PER_SET; line++) {
   2486       if (ocacheL1->set[setno].line[line].tag == tag) {
   2487          if (line == 1) {
   2488             stats_ocacheL1_found_at_1++;
   2489          } else {
   2490             stats_ocacheL1_found_at_N++;
   2491          }
   2492          if (UNLIKELY(0 == (ocacheL1_event_ctr++
   2493                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
   2494             moveLineForwards( &ocacheL1->set[setno], line );
   2495             line--;
   2496          }
   2497          return &ocacheL1->set[setno].line[line];
   2498       }
   2499    }
   2500 
   2501    /* A miss.  Use the last slot.  Implicitly this means we're
   2502       ejecting the line in the last slot. */
   2503    stats_ocacheL1_misses++;
   2504    tl_assert(line == OC_LINES_PER_SET);
   2505    line--;
   2506    tl_assert(line > 0);
   2507 
   2508    /* First, move the to-be-ejected line to the L2 cache. */
   2509    victim = &ocacheL1->set[setno].line[line];
   2510    c = classify_OCacheLine(victim);
   2511    switch (c) {
   2512       case 'e':
   2513          /* the line is empty (has invalid tag); ignore it. */
   2514          break;
   2515       case 'z':
   2516          /* line contains zeroes.  We must ensure the backing store is
   2517             updated accordingly, either by copying the line there
   2518             verbatim, or by ensuring it isn't present there.  We
   2519             chosse the latter on the basis that it reduces the size of
   2520             the backing store. */
   2521          ocacheL2_del_tag( victim->tag );
   2522          break;
   2523       case 'n':
   2524          /* line contains at least one real, useful origin.  Copy it
   2525             to the backing store. */
   2526          stats_ocacheL1_lossage++;
   2527          inL2 = ocacheL2_find_tag( victim->tag );
   2528          if (inL2) {
   2529             *inL2 = *victim;
   2530          } else {
   2531             ocacheL2_add_line( victim );
   2532          }
   2533          break;
   2534       default:
   2535          tl_assert(0);
   2536    }
   2537 
   2538    /* Now we must reload the L1 cache from the backing tree, if
   2539       possible. */
   2540    tl_assert(tag != victim->tag); /* stay sane */
   2541    inL2 = ocacheL2_find_tag( tag );
   2542    if (inL2) {
   2543       /* We're in luck.  It's in the L2. */
   2544       ocacheL1->set[setno].line[line] = *inL2;
   2545    } else {
   2546       /* Missed at both levels of the cache hierarchy.  We have to
   2547          declare it as full of zeroes (unknown origins). */
   2548       stats__ocacheL2_misses++;
   2549       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
   2550    }
   2551 
   2552    /* Move it one forwards */
   2553    moveLineForwards( &ocacheL1->set[setno], line );
   2554    line--;
   2555 
   2556    return &ocacheL1->set[setno].line[line];
   2557 }
   2558 
   2559 static INLINE OCacheLine* find_OCacheLine ( Addr a )
   2560 {
   2561    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2562    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2563    UWord tag     = a & tagmask;
   2564 
   2565    stats_ocacheL1_find++;
   2566 
   2567    if (OC_ENABLE_ASSERTIONS) {
   2568       tl_assert(setno >= 0 && setno < OC_N_SETS);
   2569       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
   2570    }
   2571 
   2572    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
   2573       return &ocacheL1->set[setno].line[0];
   2574    }
   2575 
   2576    return find_OCacheLine_SLOW( a );
   2577 }
   2578 
   2579 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
   2580 {
   2581    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2582    //// Set the origins for a+0 .. a+7
   2583    { OCacheLine* line;
   2584      UWord lineoff = oc_line_offset(a);
   2585      if (OC_ENABLE_ASSERTIONS) {
   2586         tl_assert(lineoff >= 0
   2587                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2588      }
   2589      line = find_OCacheLine( a );
   2590      line->descr[lineoff+0] = 0xF;
   2591      line->descr[lineoff+1] = 0xF;
   2592      line->w32[lineoff+0]   = otag;
   2593      line->w32[lineoff+1]   = otag;
   2594    }
   2595    //// END inlined, specialised version of MC_(helperc_b_store8)
   2596 }
   2597 
   2598 
   2599 /*------------------------------------------------------------*/
   2600 /*--- Aligned fast case permission setters,                ---*/
   2601 /*--- for dealing with stacks                              ---*/
   2602 /*------------------------------------------------------------*/
   2603 
   2604 /*--------------------- 32-bit ---------------------*/
   2605 
   2606 /* Nb: by "aligned" here we mean 4-byte aligned */
   2607 
   2608 static INLINE void make_aligned_word32_undefined ( Addr a )
   2609 {
   2610   PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED);
   2611 
   2612 #ifndef PERF_FAST_STACK2
   2613    make_mem_undefined(a, 4);
   2614 #else
   2615    {
   2616       UWord   sm_off;
   2617       SecMap* sm;
   2618 
   2619       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2620          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW);
   2621          make_mem_undefined(a, 4);
   2622          return;
   2623       }
   2624 
   2625       sm                  = get_secmap_for_writing_low(a);
   2626       sm_off              = SM_OFF(a);
   2627       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   2628    }
   2629 #endif
   2630 }
   2631 
   2632 static INLINE
   2633 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
   2634 {
   2635    make_aligned_word32_undefined(a);
   2636    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2637    //// Set the origins for a+0 .. a+3
   2638    { OCacheLine* line;
   2639      UWord lineoff = oc_line_offset(a);
   2640      if (OC_ENABLE_ASSERTIONS) {
   2641         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2642      }
   2643      line = find_OCacheLine( a );
   2644      line->descr[lineoff] = 0xF;
   2645      line->w32[lineoff]   = otag;
   2646    }
   2647    //// END inlined, specialised version of MC_(helperc_b_store4)
   2648 }
   2649 
   2650 static INLINE
   2651 void make_aligned_word32_noaccess ( Addr a )
   2652 {
   2653    PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS);
   2654 
   2655 #ifndef PERF_FAST_STACK2
   2656    MC_(make_mem_noaccess)(a, 4);
   2657 #else
   2658    {
   2659       UWord   sm_off;
   2660       SecMap* sm;
   2661 
   2662       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2663          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW);
   2664          MC_(make_mem_noaccess)(a, 4);
   2665          return;
   2666       }
   2667 
   2668       sm                  = get_secmap_for_writing_low(a);
   2669       sm_off              = SM_OFF(a);
   2670       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
   2671 
   2672       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2673       //// Set the origins for a+0 .. a+3.
   2674       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2675          OCacheLine* line;
   2676          UWord lineoff = oc_line_offset(a);
   2677          if (OC_ENABLE_ASSERTIONS) {
   2678             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2679          }
   2680          line = find_OCacheLine( a );
   2681          line->descr[lineoff] = 0;
   2682       }
   2683       //// END inlined, specialised version of MC_(helperc_b_store4)
   2684    }
   2685 #endif
   2686 }
   2687 
   2688 /*--------------------- 64-bit ---------------------*/
   2689 
   2690 /* Nb: by "aligned" here we mean 8-byte aligned */
   2691 
   2692 static INLINE void make_aligned_word64_undefined ( Addr a )
   2693 {
   2694    PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED);
   2695 
   2696 #ifndef PERF_FAST_STACK2
   2697    make_mem_undefined(a, 8);
   2698 #else
   2699    {
   2700       UWord   sm_off16;
   2701       SecMap* sm;
   2702 
   2703       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2704          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW);
   2705          make_mem_undefined(a, 8);
   2706          return;
   2707       }
   2708 
   2709       sm       = get_secmap_for_writing_low(a);
   2710       sm_off16 = SM_OFF_16(a);
   2711       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
   2712    }
   2713 #endif
   2714 }
   2715 
   2716 static INLINE
   2717 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
   2718 {
   2719    make_aligned_word64_undefined(a);
   2720    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2721    //// Set the origins for a+0 .. a+7
   2722    { OCacheLine* line;
   2723      UWord lineoff = oc_line_offset(a);
   2724      tl_assert(lineoff >= 0
   2725                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2726      line = find_OCacheLine( a );
   2727      line->descr[lineoff+0] = 0xF;
   2728      line->descr[lineoff+1] = 0xF;
   2729      line->w32[lineoff+0]   = otag;
   2730      line->w32[lineoff+1]   = otag;
   2731    }
   2732    //// END inlined, specialised version of MC_(helperc_b_store8)
   2733 }
   2734 
   2735 static INLINE
   2736 void make_aligned_word64_noaccess ( Addr a )
   2737 {
   2738    PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS);
   2739 
   2740 #ifndef PERF_FAST_STACK2
   2741    MC_(make_mem_noaccess)(a, 8);
   2742 #else
   2743    {
   2744       UWord   sm_off16;
   2745       SecMap* sm;
   2746 
   2747       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2748          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW);
   2749          MC_(make_mem_noaccess)(a, 8);
   2750          return;
   2751       }
   2752 
   2753       sm       = get_secmap_for_writing_low(a);
   2754       sm_off16 = SM_OFF_16(a);
   2755       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
   2756 
   2757       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2758       //// Clear the origins for a+0 .. a+7.
   2759       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2760          OCacheLine* line;
   2761          UWord lineoff = oc_line_offset(a);
   2762          tl_assert(lineoff >= 0
   2763                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2764          line = find_OCacheLine( a );
   2765          line->descr[lineoff+0] = 0;
   2766          line->descr[lineoff+1] = 0;
   2767       }
   2768       //// END inlined, specialised version of MC_(helperc_b_store8)
   2769    }
   2770 #endif
   2771 }
   2772 
   2773 
   2774 /*------------------------------------------------------------*/
   2775 /*--- Stack pointer adjustment                             ---*/
   2776 /*------------------------------------------------------------*/
   2777 
   2778 #ifdef PERF_FAST_STACK
   2779 #  define MAYBE_USED
   2780 #else
   2781 #  define MAYBE_USED __attribute__((unused))
   2782 #endif
   2783 
   2784 /*--------------- adjustment by 4 bytes ---------------*/
   2785 
   2786 MAYBE_USED
   2787 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
   2788 {
   2789    UInt otag = ecu | MC_OKIND_STACK;
   2790    PROF_EVENT(MCPE_NEW_MEM_STACK_4);
   2791    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2792       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2793    } else {
   2794       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
   2795    }
   2796 }
   2797 
   2798 MAYBE_USED
   2799 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
   2800 {
   2801    PROF_EVENT(MCPE_NEW_MEM_STACK_4);
   2802    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2803       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2804    } else {
   2805       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
   2806    }
   2807 }
   2808 
   2809 MAYBE_USED
   2810 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
   2811 {
   2812    PROF_EVENT(MCPE_DIE_MEM_STACK_4);
   2813    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2814       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2815    } else {
   2816       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
   2817    }
   2818 }
   2819 
   2820 /*--------------- adjustment by 8 bytes ---------------*/
   2821 
   2822 MAYBE_USED
   2823 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
   2824 {
   2825    UInt otag = ecu | MC_OKIND_STACK;
   2826    PROF_EVENT(MCPE_NEW_MEM_STACK_8);
   2827    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2828       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2829    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2830       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2831       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2832    } else {
   2833       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
   2834    }
   2835 }
   2836 
   2837 MAYBE_USED
   2838 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
   2839 {
   2840    PROF_EVENT(MCPE_NEW_MEM_STACK_8);
   2841    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2842       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2843    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2844       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2845       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2846    } else {
   2847       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
   2848    }
   2849 }
   2850 
   2851 MAYBE_USED
   2852 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
   2853 {
   2854    PROF_EVENT(MCPE_DIE_MEM_STACK_8);
   2855    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2856       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2857    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2858       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2859       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2860    } else {
   2861       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
   2862    }
   2863 }
   2864 
   2865 /*--------------- adjustment by 12 bytes ---------------*/
   2866 
   2867 MAYBE_USED
   2868 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
   2869 {
   2870    UInt otag = ecu | MC_OKIND_STACK;
   2871    PROF_EVENT(MCPE_NEW_MEM_STACK_12);
   2872    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2873       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2874       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2875    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2876       /* from previous test we don't have 8-alignment at offset +0,
   2877          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2878          do 4 at +0 and then 8 at +4/. */
   2879       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2880       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2881    } else {
   2882       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
   2883    }
   2884 }
   2885 
   2886 MAYBE_USED
   2887 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
   2888 {
   2889    PROF_EVENT(MCPE_NEW_MEM_STACK_12);
   2890    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2891       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2892       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2893    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2894       /* from previous test we don't have 8-alignment at offset +0,
   2895          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2896          do 4 at +0 and then 8 at +4/. */
   2897       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2898       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2899    } else {
   2900       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
   2901    }
   2902 }
   2903 
   2904 MAYBE_USED
   2905 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
   2906 {
   2907    PROF_EVENT(MCPE_DIE_MEM_STACK_12);
   2908    /* Note the -12 in the test */
   2909    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
   2910       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
   2911          -4. */
   2912       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2913       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2914    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2915       /* We have 4-alignment at +0, but we don't have 8-alignment at
   2916          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
   2917          and then 8 at -8. */
   2918       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2919       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2920    } else {
   2921       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
   2922    }
   2923 }
   2924 
   2925 /*--------------- adjustment by 16 bytes ---------------*/
   2926 
   2927 MAYBE_USED
   2928 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
   2929 {
   2930    UInt otag = ecu | MC_OKIND_STACK;
   2931    PROF_EVENT(MCPE_NEW_MEM_STACK_16);
   2932    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2933       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2934       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2935       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2936    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2937       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2938          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2939       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2940       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2941       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2942    } else {
   2943       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
   2944    }
   2945 }
   2946 
   2947 MAYBE_USED
   2948 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
   2949 {
   2950    PROF_EVENT(MCPE_NEW_MEM_STACK_16);
   2951    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2952       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2953       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2954       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2955    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2956       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2957          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2958       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2959       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
   2960       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   2961    } else {
   2962       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
   2963    }
   2964 }
   2965 
   2966 MAYBE_USED
   2967 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
   2968 {
   2969    PROF_EVENT(MCPE_DIE_MEM_STACK_16);
   2970    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2971       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
   2972       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2973       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2974    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2975       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
   2976       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2977       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2978       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2979    } else {
   2980       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
   2981    }
   2982 }
   2983 
   2984 /*--------------- adjustment by 32 bytes ---------------*/
   2985 
   2986 MAYBE_USED
   2987 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
   2988 {
   2989    UInt otag = ecu | MC_OKIND_STACK;
   2990    PROF_EVENT(MCPE_NEW_MEM_STACK_32);
   2991    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2992       /* Straightforward */
   2993       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2994       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2995       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2996       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2997    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2998       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   2999          +0,+28. */
   3000       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   3001       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   3002       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   3003       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
   3004       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
   3005    } else {
   3006       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
   3007    }
   3008 }
   3009 
   3010 MAYBE_USED
   3011 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
   3012 {
   3013    PROF_EVENT(MCPE_NEW_MEM_STACK_32);
   3014    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3015       /* Straightforward */
   3016       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3017       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3018       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3019       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3020    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3021       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   3022          +0,+28. */
   3023       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3024       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   3025       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   3026       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
   3027       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
   3028    } else {
   3029       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
   3030    }
   3031 }
   3032 
   3033 MAYBE_USED
   3034 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
   3035 {
   3036    PROF_EVENT(MCPE_DIE_MEM_STACK_32);
   3037    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3038       /* Straightforward */
   3039       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3040       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3041       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3042       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3043    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3044       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
   3045          4 at -32,-4. */
   3046       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3047       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
   3048       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
   3049       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   3050       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   3051    } else {
   3052       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
   3053    }
   3054 }
   3055 
   3056 /*--------------- adjustment by 112 bytes ---------------*/
   3057 
   3058 MAYBE_USED
   3059 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
   3060 {
   3061    UInt otag = ecu | MC_OKIND_STACK;
   3062    PROF_EVENT(MCPE_NEW_MEM_STACK_112);
   3063    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3064       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   3065       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   3066       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   3067       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   3068       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   3069       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   3070       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   3071       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   3072       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   3073       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   3074       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   3075       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   3076       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   3077       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3078    } else {
   3079       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
   3080    }
   3081 }
   3082 
   3083 MAYBE_USED
   3084 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
   3085 {
   3086    PROF_EVENT(MCPE_NEW_MEM_STACK_112);
   3087    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3088       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3089       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3090       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3091       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3092       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3093       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3094       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3095       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3096       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3097       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3098       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3099       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3100       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3101       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3102    } else {
   3103       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
   3104    }
   3105 }
   3106 
   3107 MAYBE_USED
   3108 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
   3109 {
   3110    PROF_EVENT(MCPE_DIE_MEM_STACK_112);
   3111    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3112       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3113       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3114       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3115       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3116       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3117       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3118       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3119       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3120       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3121       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3122       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3123       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3124       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3125       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3126    } else {
   3127       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
   3128    }
   3129 }
   3130 
   3131 /*--------------- adjustment by 128 bytes ---------------*/
   3132 
   3133 MAYBE_USED
   3134 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
   3135 {
   3136    UInt otag = ecu | MC_OKIND_STACK;
   3137    PROF_EVENT(MCPE_NEW_MEM_STACK_128);
   3138    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3139       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   3140       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   3141       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   3142       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   3143       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   3144       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   3145       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   3146       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   3147       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   3148       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   3149       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   3150       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   3151       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   3152       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3153       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3154       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3155    } else {
   3156       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
   3157    }
   3158 }
   3159 
   3160 MAYBE_USED
   3161 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
   3162 {
   3163    PROF_EVENT(MCPE_NEW_MEM_STACK_128);
   3164    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3165       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3166       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3167       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3168       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3169       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3170       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3171       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3172       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3173       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3174       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3175       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3176       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3177       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3178       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3179       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3180       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3181    } else {
   3182       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
   3183    }
   3184 }
   3185 
   3186 MAYBE_USED
   3187 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
   3188 {
   3189    PROF_EVENT(MCPE_DIE_MEM_STACK_128);
   3190    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3191       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3192       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3193       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3194       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3195       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3196       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3197       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3198       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3199       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3200       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3201       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3202       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3203       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3204       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3205       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3206       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3207    } else {
   3208       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
   3209    }
   3210 }
   3211 
   3212 /*--------------- adjustment by 144 bytes ---------------*/
   3213 
   3214 MAYBE_USED
   3215 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
   3216 {
   3217    UInt otag = ecu | MC_OKIND_STACK;
   3218    PROF_EVENT(MCPE_NEW_MEM_STACK_144);
   3219    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3220       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3221       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3222       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3223       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3224       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3225       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3226       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3227       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3228       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3229       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3230       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3231       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3232       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3233       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3234       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3235       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3236       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3237       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3238    } else {
   3239       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
   3240    }
   3241 }
   3242 
   3243 MAYBE_USED
   3244 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
   3245 {
   3246    PROF_EVENT(MCPE_NEW_MEM_STACK_144);
   3247    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3248       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3249       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3250       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3251       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3252       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3253       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3254       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3255       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3256       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3257       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3258       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3259       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3260       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3261       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3262       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3263       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3264       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3265       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3266    } else {
   3267       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
   3268    }
   3269 }
   3270 
   3271 MAYBE_USED
   3272 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
   3273 {
   3274    PROF_EVENT(MCPE_DIE_MEM_STACK_144);
   3275    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3276       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3277       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3278       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3279       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3280       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3281       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3282       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3283       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3284       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3285       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3286       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3287       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3288       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3289       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3290       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3291       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3292       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3293       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3294    } else {
   3295       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
   3296    }
   3297 }
   3298 
   3299 /*--------------- adjustment by 160 bytes ---------------*/
   3300 
   3301 MAYBE_USED
   3302 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
   3303 {
   3304    UInt otag = ecu | MC_OKIND_STACK;
   3305    PROF_EVENT(MCPE_NEW_MEM_STACK_160);
   3306    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3307       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3308       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3309       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3310       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3311       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3312       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3313       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3314       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3315       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3316       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3317       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3318       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3319       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3320       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3321       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3322       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3323       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3324       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3325       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
   3326       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
   3327    } else {
   3328       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
   3329    }
   3330 }
   3331 
   3332 MAYBE_USED
   3333 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
   3334 {
   3335    PROF_EVENT(MCPE_NEW_MEM_STACK_160);
   3336    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3337       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3338       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3339       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3340       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3341       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3342       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3343       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3344       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3345       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3346       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3347       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3348       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3349       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3350       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3351       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3352       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3353       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3354       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3355       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
   3356       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
   3357    } else {
   3358       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
   3359    }
   3360 }
   3361 
   3362 MAYBE_USED
   3363 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
   3364 {
   3365    PROF_EVENT(MCPE_DIE_MEM_STACK_160);
   3366    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3367       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
   3368       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
   3369       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3370       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3371       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3372       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3373       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3374       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3375       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3376       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3377       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3378       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3379       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3380       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3381       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3382       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3383       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3384       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3385       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3386       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3387    } else {
   3388       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
   3389    }
   3390 }
   3391 
   3392 /*--------------- adjustment by N bytes ---------------*/
   3393 
   3394 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
   3395 {
   3396    UInt otag = ecu | MC_OKIND_STACK;
   3397    PROF_EVENT(MCPE_NEW_MEM_STACK);
   3398    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
   3399 }
   3400 
   3401 static void mc_new_mem_stack ( Addr a, SizeT len )
   3402 {
   3403    PROF_EVENT(MCPE_NEW_MEM_STACK);
   3404    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
   3405 }
   3406 
   3407 static void mc_die_mem_stack ( Addr a, SizeT len )
   3408 {
   3409    PROF_EVENT(MCPE_DIE_MEM_STACK);
   3410    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
   3411 }
   3412 
   3413 
   3414 /* The AMD64 ABI says:
   3415 
   3416    "The 128-byte area beyond the location pointed to by %rsp is considered
   3417     to be reserved and shall not be modified by signal or interrupt
   3418     handlers.  Therefore, functions may use this area for temporary data
   3419     that is not needed across function calls.  In particular, leaf functions
   3420     may use this area for their entire stack frame, rather than adjusting
   3421     the stack pointer in the prologue and epilogue.  This area is known as
   3422     red zone [sic]."
   3423 
   3424    So after any call or return we need to mark this redzone as containing
   3425    undefined values.
   3426 
   3427    Consider this:  we're in function f.  f calls g.  g moves rsp down
   3428    modestly (say 16 bytes) and writes stuff all over the red zone, making it
   3429    defined.  g returns.  f is buggy and reads from parts of the red zone
   3430    that it didn't write on.  But because g filled that area in, f is going
   3431    to be picking up defined V bits and so any errors from reading bits of
   3432    the red zone it didn't write, will be missed.  The only solution I could
   3433    think of was to make the red zone undefined when g returns to f.
   3434 
   3435    This is in accordance with the ABI, which makes it clear the redzone
   3436    is volatile across function calls.
   3437 
   3438    The problem occurs the other way round too: f could fill the RZ up
   3439    with defined values and g could mistakenly read them.  So the RZ
   3440    also needs to be nuked on function calls.
   3441 */
   3442 
   3443 
   3444 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
   3445    improved so as to have a lower miss rate. */
   3446 
   3447 static UWord stats__nia_cache_queries = 0;
   3448 static UWord stats__nia_cache_misses  = 0;
   3449 
   3450 typedef
   3451    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
   3452             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
   3453    WCacheEnt;
   3454 
   3455 #define N_NIA_TO_ECU_CACHE 511
   3456 
   3457 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
   3458 
   3459 static void init_nia_to_ecu_cache ( void )
   3460 {
   3461    UWord       i;
   3462    Addr        zero_addr = 0;
   3463    ExeContext* zero_ec;
   3464    UInt        zero_ecu;
   3465    /* Fill all the slots with an entry for address zero, and the
   3466       relevant otags accordingly.  Hence the cache is initially filled
   3467       with valid data. */
   3468    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
   3469    tl_assert(zero_ec);
   3470    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
   3471    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
   3472    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
   3473       nia_to_ecu_cache[i].nia0 = zero_addr;
   3474       nia_to_ecu_cache[i].ecu0 = zero_ecu;
   3475       nia_to_ecu_cache[i].nia1 = zero_addr;
   3476       nia_to_ecu_cache[i].ecu1 = zero_ecu;
   3477    }
   3478 }
   3479 
   3480 static inline UInt convert_nia_to_ecu ( Addr nia )
   3481 {
   3482    UWord i;
   3483    UInt        ecu;
   3484    ExeContext* ec;
   3485 
   3486    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
   3487 
   3488    stats__nia_cache_queries++;
   3489    i = nia % N_NIA_TO_ECU_CACHE;
   3490    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
   3491 
   3492    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
   3493       return nia_to_ecu_cache[i].ecu0;
   3494 
   3495    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
   3496 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
   3497       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
   3498       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
   3499 #     undef SWAP
   3500       return nia_to_ecu_cache[i].ecu0;
   3501    }
   3502 
   3503    stats__nia_cache_misses++;
   3504    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
   3505    tl_assert(ec);
   3506    ecu = VG_(get_ECU_from_ExeContext)(ec);
   3507    tl_assert(VG_(is_plausible_ECU)(ecu));
   3508 
   3509    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
   3510    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
   3511 
   3512    nia_to_ecu_cache[i].nia0 = nia;
   3513    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
   3514    return ecu;
   3515 }
   3516 
   3517 
   3518 /* Note that this serves both the origin-tracking and
   3519    no-origin-tracking modes.  We assume that calls to it are
   3520    sufficiently infrequent that it isn't worth specialising for the
   3521    with/without origin-tracking cases. */
   3522 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
   3523 {
   3524    UInt otag;
   3525    tl_assert(sizeof(UWord) == sizeof(SizeT));
   3526    if (0)
   3527       VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
   3528                   base, len, nia );
   3529 
   3530    if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3531       UInt ecu = convert_nia_to_ecu ( nia );
   3532       tl_assert(VG_(is_plausible_ECU)(ecu));
   3533       otag = ecu | MC_OKIND_STACK;
   3534    } else {
   3535       tl_assert(nia == 0);
   3536       otag = 0;
   3537    }
   3538 
   3539 #  if 0
   3540    /* Really slow version */
   3541    MC_(make_mem_undefined)(base, len, otag);
   3542 #  endif
   3543 
   3544 #  if 0
   3545    /* Slow(ish) version, which is fairly easily seen to be correct.
   3546    */
   3547    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
   3548       make_aligned_word64_undefined(base +   0, otag);
   3549       make_aligned_word64_undefined(base +   8, otag);
   3550       make_aligned_word64_undefined(base +  16, otag);
   3551       make_aligned_word64_undefined(base +  24, otag);
   3552 
   3553       make_aligned_word64_undefined(base +  32, otag);
   3554       make_aligned_word64_undefined(base +  40, otag);
   3555       make_aligned_word64_undefined(base +  48, otag);
   3556       make_aligned_word64_undefined(base +  56, otag);
   3557 
   3558       make_aligned_word64_undefined(base +  64, otag);
   3559       make_aligned_word64_undefined(base +  72, otag);
   3560       make_aligned_word64_undefined(base +  80, otag);
   3561       make_aligned_word64_undefined(base +  88, otag);
   3562 
   3563       make_aligned_word64_undefined(base +  96, otag);
   3564       make_aligned_word64_undefined(base + 104, otag);
   3565       make_aligned_word64_undefined(base + 112, otag);
   3566       make_aligned_word64_undefined(base + 120, otag);
   3567    } else {
   3568       MC_(make_mem_undefined)(base, len, otag);
   3569    }
   3570 #  endif
   3571 
   3572    /* Idea is: go fast when
   3573          * 8-aligned and length is 128
   3574          * the sm is available in the main primary map
   3575          * the address range falls entirely with a single secondary map
   3576       If all those conditions hold, just update the V+A bits by writing
   3577       directly into the vabits array.  (If the sm was distinguished, this
   3578       will make a copy and then write to it.)
   3579    */
   3580 
   3581    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
   3582       /* Now we know the address range is suitably sized and aligned. */
   3583       UWord a_lo = (UWord)(base);
   3584       UWord a_hi = (UWord)(base + 128 - 1);
   3585       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3586       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3587          // Now we know the entire range is within the main primary map.
   3588          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3589          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3590          /* Now we know that the entire address range falls within a
   3591             single secondary map, and that that secondary 'lives' in
   3592             the main primary map. */
   3593          if (LIKELY(sm == sm_hi)) {
   3594             // Finally, we know that the range is entirely within one secmap.
   3595             UWord   v_off = SM_OFF(a_lo);
   3596             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3597             p[ 0] = VA_BITS16_UNDEFINED;
   3598             p[ 1] = VA_BITS16_UNDEFINED;
   3599             p[ 2] = VA_BITS16_UNDEFINED;
   3600             p[ 3] = VA_BITS16_UNDEFINED;
   3601             p[ 4] = VA_BITS16_UNDEFINED;
   3602             p[ 5] = VA_BITS16_UNDEFINED;
   3603             p[ 6] = VA_BITS16_UNDEFINED;
   3604             p[ 7] = VA_BITS16_UNDEFINED;
   3605             p[ 8] = VA_BITS16_UNDEFINED;
   3606             p[ 9] = VA_BITS16_UNDEFINED;
   3607             p[10] = VA_BITS16_UNDEFINED;
   3608             p[11] = VA_BITS16_UNDEFINED;
   3609             p[12] = VA_BITS16_UNDEFINED;
   3610             p[13] = VA_BITS16_UNDEFINED;
   3611             p[14] = VA_BITS16_UNDEFINED;
   3612             p[15] = VA_BITS16_UNDEFINED;
   3613             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3614                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3615                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3616                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3617                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3618                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3619                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3620                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3621                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3622                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3623                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3624                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3625                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3626                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3627                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3628                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3629                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3630             }
   3631             return;
   3632          }
   3633       }
   3634    }
   3635 
   3636    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
   3637    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
   3638       /* Now we know the address range is suitably sized and aligned. */
   3639       UWord a_lo = (UWord)(base);
   3640       UWord a_hi = (UWord)(base + 288 - 1);
   3641       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3642       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3643          // Now we know the entire range is within the main primary map.
   3644          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3645          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3646          /* Now we know that the entire address range falls within a
   3647             single secondary map, and that that secondary 'lives' in
   3648             the main primary map. */
   3649          if (LIKELY(sm == sm_hi)) {
   3650             // Finally, we know that the range is entirely within one secmap.
   3651             UWord   v_off = SM_OFF(a_lo);
   3652             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3653             p[ 0] = VA_BITS16_UNDEFINED;
   3654             p[ 1] = VA_BITS16_UNDEFINED;
   3655             p[ 2] = VA_BITS16_UNDEFINED;
   3656             p[ 3] = VA_BITS16_UNDEFINED;
   3657             p[ 4] = VA_BITS16_UNDEFINED;
   3658             p[ 5] = VA_BITS16_UNDEFINED;
   3659             p[ 6] = VA_BITS16_UNDEFINED;
   3660             p[ 7] = VA_BITS16_UNDEFINED;
   3661             p[ 8] = VA_BITS16_UNDEFINED;
   3662             p[ 9] = VA_BITS16_UNDEFINED;
   3663             p[10] = VA_BITS16_UNDEFINED;
   3664             p[11] = VA_BITS16_UNDEFINED;
   3665             p[12] = VA_BITS16_UNDEFINED;
   3666             p[13] = VA_BITS16_UNDEFINED;
   3667             p[14] = VA_BITS16_UNDEFINED;
   3668             p[15] = VA_BITS16_UNDEFINED;
   3669             p[16] = VA_BITS16_UNDEFINED;
   3670             p[17] = VA_BITS16_UNDEFINED;
   3671             p[18] = VA_BITS16_UNDEFINED;
   3672             p[19] = VA_BITS16_UNDEFINED;
   3673             p[20] = VA_BITS16_UNDEFINED;
   3674             p[21] = VA_BITS16_UNDEFINED;
   3675             p[22] = VA_BITS16_UNDEFINED;
   3676             p[23] = VA_BITS16_UNDEFINED;
   3677             p[24] = VA_BITS16_UNDEFINED;
   3678             p[25] = VA_BITS16_UNDEFINED;
   3679             p[26] = VA_BITS16_UNDEFINED;
   3680             p[27] = VA_BITS16_UNDEFINED;
   3681             p[28] = VA_BITS16_UNDEFINED;
   3682             p[29] = VA_BITS16_UNDEFINED;
   3683             p[30] = VA_BITS16_UNDEFINED;
   3684             p[31] = VA_BITS16_UNDEFINED;
   3685             p[32] = VA_BITS16_UNDEFINED;
   3686             p[33] = VA_BITS16_UNDEFINED;
   3687             p[34] = VA_BITS16_UNDEFINED;
   3688             p[35] = VA_BITS16_UNDEFINED;
   3689             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3690                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3691                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3692                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3693                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3694                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3695                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3696                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3697                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3698                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3699                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3700                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3701                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3702                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3703                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3704                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3705                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3706                set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
   3707                set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
   3708                set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
   3709                set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
   3710                set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
   3711                set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
   3712                set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
   3713                set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
   3714                set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
   3715                set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
   3716                set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
   3717                set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
   3718                set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
   3719                set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
   3720                set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
   3721                set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
   3722                set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
   3723                set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
   3724                set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
   3725                set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
   3726             }
   3727             return;
   3728          }
   3729       }
   3730    }
   3731 
   3732    /* else fall into slow case */
   3733    MC_(make_mem_undefined_w_otag)(base, len, otag);
   3734 }
   3735 
   3736 
   3737 /*------------------------------------------------------------*/
   3738 /*--- Checking memory                                      ---*/
   3739 /*------------------------------------------------------------*/
   3740 
   3741 typedef
   3742    enum {
   3743       MC_Ok = 5,
   3744       MC_AddrErr = 6,
   3745       MC_ValueErr = 7
   3746    }
   3747    MC_ReadResult;
   3748 
   3749 
   3750 /* Check permissions for address range.  If inadequate permissions
   3751    exist, *bad_addr is set to the offending address, so the caller can
   3752    know what it is. */
   3753 
   3754 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
   3755    returns False, and if bad_addr is non-NULL, sets *bad_addr to
   3756    indicate the lowest failing address.  Functions below are
   3757    similar. */
   3758 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
   3759 {
   3760    SizeT i;
   3761    UWord vabits2;
   3762 
   3763    PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS);
   3764    for (i = 0; i < len; i++) {
   3765       PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP);
   3766       vabits2 = get_vabits2(a);
   3767       if (VA_BITS2_NOACCESS != vabits2) {
   3768          if (bad_addr != NULL) *bad_addr = a;
   3769          return False;
   3770       }
   3771       a++;
   3772    }
   3773    return True;
   3774 }
   3775 
   3776 static Bool is_mem_addressable ( Addr a, SizeT len,
   3777                                  /*OUT*/Addr* bad_addr )
   3778 {
   3779    SizeT i;
   3780    UWord vabits2;
   3781 
   3782    PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE);
   3783    for (i = 0; i < len; i++) {
   3784       PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP);
   3785       vabits2 = get_vabits2(a);
   3786       if (VA_BITS2_NOACCESS == vabits2) {
   3787          if (bad_addr != NULL) *bad_addr = a;
   3788          return False;
   3789       }
   3790       a++;
   3791    }
   3792    return True;
   3793 }
   3794 
   3795 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
   3796                                       /*OUT*/Addr* bad_addr,
   3797                                       /*OUT*/UInt* otag )
   3798 {
   3799    SizeT i;
   3800    UWord vabits2;
   3801 
   3802    PROF_EVENT(MCPE_IS_MEM_DEFINED);
   3803    DEBUG("is_mem_defined\n");
   3804 
   3805    if (otag)     *otag = 0;
   3806    if (bad_addr) *bad_addr = 0;
   3807    for (i = 0; i < len; i++) {
   3808       PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP);
   3809       vabits2 = get_vabits2(a);
   3810       if (VA_BITS2_DEFINED != vabits2) {
   3811          // Error!  Nb: Report addressability errors in preference to
   3812          // definedness errors.  And don't report definedeness errors unless
   3813          // --undef-value-errors=yes.
   3814          if (bad_addr) {
   3815             *bad_addr = a;
   3816          }
   3817          if (VA_BITS2_NOACCESS == vabits2) {
   3818             return MC_AddrErr;
   3819          }
   3820          if (MC_(clo_mc_level) >= 2) {
   3821             if (otag && MC_(clo_mc_level) == 3) {
   3822                *otag = MC_(helperc_b_load1)( a );
   3823             }
   3824             return MC_ValueErr;
   3825          }
   3826       }
   3827       a++;
   3828    }
   3829    return MC_Ok;
   3830 }
   3831 
   3832 
   3833 /* Like is_mem_defined but doesn't give up at the first uninitialised
   3834    byte -- the entire range is always checked.  This is important for
   3835    detecting errors in the case where a checked range strays into
   3836    invalid memory, but that fact is not detected by the ordinary
   3837    is_mem_defined(), because of an undefined section that precedes the
   3838    out of range section, possibly as a result of an alignment hole in
   3839    the checked data.  This version always checks the entire range and
   3840    can report both a definedness and an accessbility error, if
   3841    necessary. */
   3842 static void is_mem_defined_comprehensive (
   3843                Addr a, SizeT len,
   3844                /*OUT*/Bool* errorV,    /* is there a definedness err? */
   3845                /*OUT*/Addr* bad_addrV, /* if so where? */
   3846                /*OUT*/UInt* otagV,     /* and what's its otag? */
   3847                /*OUT*/Bool* errorA,    /* is there an addressability err? */
   3848                /*OUT*/Addr* bad_addrA  /* if so where? */
   3849             )
   3850 {
   3851    SizeT i;
   3852    UWord vabits2;
   3853    Bool  already_saw_errV = False;
   3854 
   3855    PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE);
   3856    DEBUG("is_mem_defined_comprehensive\n");
   3857 
   3858    tl_assert(!(*errorV || *errorA));
   3859 
   3860    for (i = 0; i < len; i++) {
   3861       PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP);
   3862       vabits2 = get_vabits2(a);
   3863       switch (vabits2) {
   3864          case VA_BITS2_DEFINED:
   3865             a++;
   3866             break;
   3867          case VA_BITS2_UNDEFINED:
   3868          case VA_BITS2_PARTDEFINED:
   3869             if (!already_saw_errV) {
   3870                *errorV    = True;
   3871                *bad_addrV = a;
   3872                if (MC_(clo_mc_level) == 3) {
   3873                   *otagV = MC_(helperc_b_load1)( a );
   3874                } else {
   3875                   *otagV = 0;
   3876                }
   3877                already_saw_errV = True;
   3878             }
   3879             a++; /* keep going */
   3880             break;
   3881          case VA_BITS2_NOACCESS:
   3882             *errorA    = True;
   3883             *bad_addrA = a;
   3884             return; /* give up now. */
   3885          default:
   3886             tl_assert(0);
   3887       }
   3888    }
   3889 }
   3890 
   3891 
   3892 /* Check a zero-terminated ascii string.  Tricky -- don't want to
   3893    examine the actual bytes, to find the end, until we're sure it is
   3894    safe to do so. */
   3895 
   3896 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
   3897 {
   3898    UWord vabits2;
   3899 
   3900    PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ);
   3901    DEBUG("mc_is_defined_asciiz\n");
   3902 
   3903    if (otag)     *otag = 0;
   3904    if (bad_addr) *bad_addr = 0;
   3905    while (True) {
   3906       PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP);
   3907       vabits2 = get_vabits2(a);
   3908       if (VA_BITS2_DEFINED != vabits2) {
   3909          // Error!  Nb: Report addressability errors in preference to
   3910          // definedness errors.  And don't report definedeness errors unless
   3911          // --undef-value-errors=yes.
   3912          if (bad_addr) {
   3913             *bad_addr = a;
   3914          }
   3915          if (VA_BITS2_NOACCESS == vabits2) {
   3916             return MC_AddrErr;
   3917          }
   3918          if (MC_(clo_mc_level) >= 2) {
   3919             if (otag && MC_(clo_mc_level) == 3) {
   3920                *otag = MC_(helperc_b_load1)( a );
   3921             }
   3922             return MC_ValueErr;
   3923          }
   3924       }
   3925       /* Ok, a is safe to read. */
   3926       if (* ((UChar*)a) == 0) {
   3927          return MC_Ok;
   3928       }
   3929       a++;
   3930    }
   3931 }
   3932 
   3933 
   3934 /*------------------------------------------------------------*/
   3935 /*--- Memory event handlers                                ---*/
   3936 /*------------------------------------------------------------*/
   3937 
   3938 static
   3939 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
   3940                                 Addr base, SizeT size )
   3941 {
   3942    Addr bad_addr;
   3943    Bool ok = is_mem_addressable ( base, size, &bad_addr );
   3944 
   3945    if (!ok) {
   3946       switch (part) {
   3947       case Vg_CoreSysCall:
   3948          MC_(record_memparam_error) ( tid, bad_addr,
   3949                                       /*isAddrErr*/True, s, 0/*otag*/ );
   3950          break;
   3951 
   3952       case Vg_CoreSignal:
   3953          MC_(record_core_mem_error)( tid, s );
   3954          break;
   3955 
   3956       default:
   3957          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
   3958       }
   3959    }
   3960 }
   3961 
   3962 static
   3963 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
   3964                             Addr base, SizeT size )
   3965 {
   3966    UInt otag = 0;
   3967    Addr bad_addr;
   3968    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
   3969 
   3970    if (MC_Ok != res) {
   3971       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   3972 
   3973       switch (part) {
   3974       case Vg_CoreSysCall:
   3975          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   3976                                       isAddrErr ? 0 : otag );
   3977          break;
   3978 
   3979       case Vg_CoreSysCallArgInMem:
   3980          MC_(record_regparam_error) ( tid, s, otag );
   3981          break;
   3982 
   3983       /* If we're being asked to jump to a silly address, record an error
   3984          message before potentially crashing the entire system. */
   3985       case Vg_CoreTranslate:
   3986          MC_(record_jump_error)( tid, bad_addr );
   3987          break;
   3988 
   3989       default:
   3990          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
   3991       }
   3992    }
   3993 }
   3994 
   3995 static
   3996 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
   3997                                    const HChar* s, Addr str )
   3998 {
   3999    MC_ReadResult res;
   4000    Addr bad_addr = 0;   // shut GCC up
   4001    UInt otag = 0;
   4002 
   4003    tl_assert(part == Vg_CoreSysCall);
   4004    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
   4005    if (MC_Ok != res) {
   4006       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   4007       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   4008                                    isAddrErr ? 0 : otag );
   4009    }
   4010 }
   4011 
   4012 /* Handling of mmap and mprotect is not as simple as it seems.
   4013 
   4014    The underlying semantics are that memory obtained from mmap is
   4015    always initialised, but may be inaccessible.  And changes to the
   4016    protection of memory do not change its contents and hence not its
   4017    definedness state.  Problem is we can't model
   4018    inaccessible-but-with-some-definedness state; once we mark memory
   4019    as inaccessible we lose all info about definedness, and so can't
   4020    restore that if it is later made accessible again.
   4021 
   4022    One obvious thing to do is this:
   4023 
   4024       mmap/mprotect NONE  -> noaccess
   4025       mmap/mprotect other -> defined
   4026 
   4027    The problem case here is: taking accessible memory, writing
   4028    uninitialised data to it, mprotecting it NONE and later mprotecting
   4029    it back to some accessible state causes the undefinedness to be
   4030    lost.
   4031 
   4032    A better proposal is:
   4033 
   4034      (1) mmap NONE       ->  make noaccess
   4035      (2) mmap other      ->  make defined
   4036 
   4037      (3) mprotect NONE   ->  # no change
   4038      (4) mprotect other  ->  change any "noaccess" to "defined"
   4039 
   4040    (2) is OK because memory newly obtained from mmap really is defined
   4041        (zeroed out by the kernel -- doing anything else would
   4042        constitute a massive security hole.)
   4043 
   4044    (1) is OK because the only way to make the memory usable is via
   4045        (4), in which case we also wind up correctly marking it all as
   4046        defined.
   4047 
   4048    (3) is the weak case.  We choose not to change memory state.
   4049        (presumably the range is in some mixture of "defined" and
   4050        "undefined", viz, accessible but with arbitrary V bits).  Doing
   4051        nothing means we retain the V bits, so that if the memory is
   4052        later mprotected "other", the V bits remain unchanged, so there
   4053        can be no false negatives.  The bad effect is that if there's
   4054        an access in the area, then MC cannot warn; but at least we'll
   4055        get a SEGV to show, so it's better than nothing.
   4056 
   4057    Consider the sequence (3) followed by (4).  Any memory that was
   4058    "defined" or "undefined" previously retains its state (as
   4059    required).  Any memory that was "noaccess" before can only have
   4060    been made that way by (1), and so it's OK to change it to
   4061    "defined".
   4062 
   4063    See https://bugs.kde.org/show_bug.cgi?id=205541
   4064    and https://bugs.kde.org/show_bug.cgi?id=210268
   4065 */
   4066 static
   4067 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
   4068                        ULong di_handle )
   4069 {
   4070    if (rr || ww || xx) {
   4071       /* (2) mmap/mprotect other -> defined */
   4072       MC_(make_mem_defined)(a, len);
   4073    } else {
   4074       /* (1) mmap/mprotect NONE  -> noaccess */
   4075       MC_(make_mem_noaccess)(a, len);
   4076    }
   4077 }
   4078 
   4079 static
   4080 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
   4081 {
   4082    if (rr || ww || xx) {
   4083       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
   4084       make_mem_defined_if_noaccess(a, len);
   4085    } else {
   4086       /* (3) mprotect NONE   ->  # no change */
   4087       /* do nothing */
   4088    }
   4089 }
   4090 
   4091 
   4092 static
   4093 void mc_new_mem_startup( Addr a, SizeT len,
   4094                          Bool rr, Bool ww, Bool xx, ULong di_handle )
   4095 {
   4096    // Because code is defined, initialised variables get put in the data
   4097    // segment and are defined, and uninitialised variables get put in the
   4098    // bss segment and are auto-zeroed (and so defined).
   4099    //
   4100    // It's possible that there will be padding between global variables.
   4101    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
   4102    // a program uses it, Memcheck will not complain.  This is arguably a
   4103    // false negative, but it's a grey area -- the behaviour is defined (the
   4104    // padding is zeroed) but it's probably not what the user intended.  And
   4105    // we can't avoid it.
   4106    //
   4107    // Note: we generally ignore RWX permissions, because we can't track them
   4108    // without requiring more than one A bit which would slow things down a
   4109    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
   4110    // So we mark any such pages as "unaddressable".
   4111    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
   4112          a, (ULong)len, rr, ww, xx);
   4113    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
   4114 }
   4115 
   4116 static
   4117 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
   4118 {
   4119    MC_(make_mem_defined)(a, len);
   4120 }
   4121 
   4122 
   4123 /*------------------------------------------------------------*/
   4124 /*--- Register event handlers                              ---*/
   4125 /*------------------------------------------------------------*/
   4126 
   4127 /* Try and get a nonzero origin for the guest state section of thread
   4128    tid characterised by (offset,size).  Return 0 if nothing to show
   4129    for it. */
   4130 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
   4131                                              Int offset, SizeT size )
   4132 {
   4133    Int   sh2off;
   4134    UInt  area[3];
   4135    UInt  otag;
   4136    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
   4137    if (sh2off == -1)
   4138       return 0;  /* This piece of guest state is not tracked */
   4139    tl_assert(sh2off >= 0);
   4140    tl_assert(0 == (sh2off % 4));
   4141    area[0] = 0x31313131;
   4142    area[2] = 0x27272727;
   4143    VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
   4144    tl_assert(area[0] == 0x31313131);
   4145    tl_assert(area[2] == 0x27272727);
   4146    otag = area[1];
   4147    return otag;
   4148 }
   4149 
   4150 
   4151 /* When some chunk of guest state is written, mark the corresponding
   4152    shadow area as valid.  This is used to initialise arbitrarily large
   4153    chunks of guest state, hence the _SIZE value, which has to be as
   4154    big as the biggest guest state.
   4155 */
   4156 static void mc_post_reg_write ( CorePart part, ThreadId tid,
   4157                                 PtrdiffT offset, SizeT size)
   4158 {
   4159 #  define MAX_REG_WRITE_SIZE 1712
   4160    UChar area[MAX_REG_WRITE_SIZE];
   4161    tl_assert(size <= MAX_REG_WRITE_SIZE);
   4162    VG_(memset)(area, V_BITS8_DEFINED, size);
   4163    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
   4164 #  undef MAX_REG_WRITE_SIZE
   4165 }
   4166 
   4167 static
   4168 void mc_post_reg_write_clientcall ( ThreadId tid,
   4169                                     PtrdiffT offset, SizeT size, Addr f)
   4170 {
   4171    mc_post_reg_write(/*dummy*/0, tid, offset, size);
   4172 }
   4173 
   4174 /* Look at the definedness of the guest's shadow state for
   4175    [offset, offset+len).  If any part of that is undefined, record
   4176    a parameter error.
   4177 */
   4178 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
   4179                               PtrdiffT offset, SizeT size)
   4180 {
   4181    Int   i;
   4182    Bool  bad;
   4183    UInt  otag;
   4184 
   4185    UChar area[16];
   4186    tl_assert(size <= 16);
   4187 
   4188    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
   4189 
   4190    bad = False;
   4191    for (i = 0; i < size; i++) {
   4192       if (area[i] != V_BITS8_DEFINED) {
   4193          bad = True;
   4194          break;
   4195       }
   4196    }
   4197 
   4198    if (!bad)
   4199       return;
   4200 
   4201    /* We've found some undefinedness.  See if we can also find an
   4202       origin for it. */
   4203    otag = mb_get_origin_for_guest_offset( tid, offset, size );
   4204    MC_(record_regparam_error) ( tid, s, otag );
   4205 }
   4206 
   4207 
   4208 /*------------------------------------------------------------*/
   4209 /*--- Register-memory event handlers                       ---*/
   4210 /*------------------------------------------------------------*/
   4211 
   4212 static void mc_copy_mem_to_reg ( CorePart part, ThreadId tid, Addr a,
   4213                                  PtrdiffT guest_state_offset, SizeT size )
   4214 {
   4215    SizeT i;
   4216    UChar vbits8;
   4217    Int offset;
   4218    UInt d32;
   4219 
   4220    /* Slow loop. */
   4221    for (i = 0; i < size; i++) {
   4222       get_vbits8( a+i, &vbits8 );
   4223       VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/, guest_state_offset+i,
   4224                                  1, &vbits8 );
   4225    }
   4226 
   4227    if (MC_(clo_mc_level) != 3)
   4228       return;
   4229 
   4230    /* Track origins. */
   4231    offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
   4232    if (offset == -1)
   4233       return;
   4234 
   4235    switch (size) {
   4236    case 1:
   4237       d32 = MC_(helperc_b_load1)( a );
   4238       break;
   4239    case 2:
   4240       d32 = MC_(helperc_b_load2)( a );
   4241       break;
   4242    case 4:
   4243       d32 = MC_(helperc_b_load4)( a );
   4244       break;
   4245    case 8:
   4246       d32 = MC_(helperc_b_load8)( a );
   4247       break;
   4248    case 16:
   4249       d32 = MC_(helperc_b_load16)( a );
   4250       break;
   4251    case 32:
   4252       d32 = MC_(helperc_b_load32)( a );
   4253       break;
   4254    default:
   4255       tl_assert(0);
   4256    }
   4257 
   4258    VG_(set_shadow_regs_area)( tid, 2/*shadowNo*/, offset, 4, (UChar*)&d32 );
   4259 }
   4260 
   4261 static void mc_copy_reg_to_mem ( CorePart part, ThreadId tid,
   4262                                  PtrdiffT guest_state_offset, Addr a,
   4263                                  SizeT size )
   4264 {
   4265    SizeT i;
   4266    UChar vbits8;
   4267    Int offset;
   4268    UInt d32;
   4269 
   4270    /* Slow loop. */
   4271    for (i = 0; i < size; i++) {
   4272       VG_(get_shadow_regs_area)( tid, &vbits8, 1/*shadowNo*/,
   4273                                  guest_state_offset+i, 1 );
   4274       set_vbits8( a+i, vbits8 );
   4275    }
   4276 
   4277    if (MC_(clo_mc_level) != 3)
   4278       return;
   4279 
   4280    /* Track origins. */
   4281    offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
   4282    if (offset == -1)
   4283       return;
   4284 
   4285    VG_(get_shadow_regs_area)( tid, (UChar*)&d32, 2/*shadowNo*/, offset, 4 );
   4286    switch (size) {
   4287    case 1:
   4288       MC_(helperc_b_store1)( a, d32 );
   4289       break;
   4290    case 2:
   4291       MC_(helperc_b_store2)( a, d32 );
   4292       break;
   4293    case 4:
   4294       MC_(helperc_b_store4)( a, d32 );
   4295       break;
   4296    case 8:
   4297       MC_(helperc_b_store8)( a, d32 );
   4298       break;
   4299    case 16:
   4300       MC_(helperc_b_store16)( a, d32 );
   4301       break;
   4302    case 32:
   4303       MC_(helperc_b_store32)( a, d32 );
   4304       break;
   4305    default:
   4306       tl_assert(0);
   4307    }
   4308 }
   4309 
   4310 
   4311 /*------------------------------------------------------------*/
   4312 /*--- Some static assertions                               ---*/
   4313 /*------------------------------------------------------------*/
   4314 
   4315 /* The handwritten assembly helpers below have baked-in assumptions
   4316    about various constant values.  These assertions attempt to make
   4317    that a bit safer by checking those values and flagging changes that
   4318    would make the assembly invalid.  Not perfect but it's better than
   4319    nothing. */
   4320 
   4321 STATIC_ASSERT(SM_CHUNKS * 4 == 65536);
   4322 
   4323 STATIC_ASSERT(VA_BITS8_DEFINED   == 0xAA);
   4324 STATIC_ASSERT(VA_BITS8_UNDEFINED == 0x55);
   4325 
   4326 STATIC_ASSERT(V_BITS32_DEFINED   == 0x00000000);
   4327 STATIC_ASSERT(V_BITS32_UNDEFINED == 0xFFFFFFFF);
   4328 
   4329 STATIC_ASSERT(VA_BITS4_DEFINED == 0xA);
   4330 STATIC_ASSERT(VA_BITS4_UNDEFINED == 0x5);
   4331 
   4332 STATIC_ASSERT(V_BITS16_DEFINED == 0x0000);
   4333 STATIC_ASSERT(V_BITS16_UNDEFINED == 0xFFFF);
   4334 
   4335 STATIC_ASSERT(VA_BITS2_DEFINED == 2);
   4336 STATIC_ASSERT(VA_BITS2_UNDEFINED == 1);
   4337 
   4338 STATIC_ASSERT(V_BITS8_DEFINED == 0x00);
   4339 STATIC_ASSERT(V_BITS8_UNDEFINED == 0xFF);
   4340 
   4341 
   4342 /*------------------------------------------------------------*/
   4343 /*--- Functions called directly from generated code:       ---*/
   4344 /*--- Load/store handlers.                                 ---*/
   4345 /*------------------------------------------------------------*/
   4346 
   4347 /* Types:  LOADV32, LOADV16, LOADV8 are:
   4348                UWord fn ( Addr a )
   4349    so they return 32-bits on 32-bit machines and 64-bits on
   4350    64-bit machines.  Addr has the same size as a host word.
   4351 
   4352    LOADV64 is always  ULong fn ( Addr a )
   4353 
   4354    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
   4355    are a UWord, and for STOREV64 they are a ULong.
   4356 */
   4357 
   4358 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
   4359    naturally '_sz/8'-aligned, or it exceeds the range covered by the
   4360    primary map.  This is all very tricky (and important!), so let's
   4361    work through the maths by hand (below), *and* assert for these
   4362    values at startup. */
   4363 #define MASK(_szInBytes) \
   4364    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
   4365 
   4366 /* MASK only exists so as to define this macro. */
   4367 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
   4368    ((_a) & MASK((_szInBits>>3)))
   4369 
   4370 /* On a 32-bit machine:
   4371 
   4372    N_PRIMARY_BITS          == 16, so
   4373    N_PRIMARY_MAP           == 0x10000, so
   4374    N_PRIMARY_MAP-1         == 0xFFFF, so
   4375    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
   4376 
   4377    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
   4378            = ~ ( 0xFFFF | 0xFFFF0000 )
   4379            = ~ 0xFFFF'FFFF
   4380            = 0
   4381 
   4382    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
   4383            = ~ ( 0xFFFE | 0xFFFF0000 )
   4384            = ~ 0xFFFF'FFFE
   4385            = 1
   4386 
   4387    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
   4388            = ~ ( 0xFFFC | 0xFFFF0000 )
   4389            = ~ 0xFFFF'FFFC
   4390            = 3
   4391 
   4392    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
   4393            = ~ ( 0xFFF8 | 0xFFFF0000 )
   4394            = ~ 0xFFFF'FFF8
   4395            = 7
   4396 
   4397    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
   4398    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
   4399    the 1-byte alignment case, it is always a zero value, since MASK(1)
   4400    is zero.  All as expected.
   4401 
   4402    On a 64-bit machine, it's more complex, since we're testing
   4403    simultaneously for misalignment and for the address being at or
   4404    above 64G:
   4405 
   4406    N_PRIMARY_BITS          == 20, so
   4407    N_PRIMARY_MAP           == 0x100000, so
   4408    N_PRIMARY_MAP-1         == 0xFFFFF, so
   4409    (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
   4410 
   4411    MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
   4412            = ~ ( 0xFFFF | 0xF'FFFF'0000 )
   4413            = ~ 0xF'FFFF'FFFF
   4414            = 0xFFFF'FFF0'0000'0000
   4415 
   4416    MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
   4417            = ~ ( 0xFFFE | 0xF'FFFF'0000 )
   4418            = ~ 0xF'FFFF'FFFE
   4419            = 0xFFFF'FFF0'0000'0001
   4420 
   4421    MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
   4422            = ~ ( 0xFFFC | 0xF'FFFF'0000 )
   4423            = ~ 0xF'FFFF'FFFC
   4424            = 0xFFFF'FFF0'0000'0003
   4425 
   4426    MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
   4427            = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
   4428            = ~ 0xF'FFFF'FFF8
   4429            = 0xFFFF'FFF0'0000'0007
   4430 */
   4431 
   4432 /*------------------------------------------------------------*/
   4433 /*--- LOADV256 and LOADV128                                ---*/
   4434 /*------------------------------------------------------------*/
   4435 
   4436 static INLINE
   4437 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
   4438                            Addr a, SizeT nBits, Bool isBigEndian )
   4439 {
   4440    PROF_EVENT(MCPE_LOADV_128_OR_256);
   4441 
   4442 #ifndef PERF_FAST_LOADV
   4443    mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
   4444    return;
   4445 #else
   4446    {
   4447       UWord   sm_off16, vabits16, j;
   4448       UWord   nBytes  = nBits / 8;
   4449       UWord   nULongs = nBytes / 8;
   4450       SecMap* sm;
   4451 
   4452       if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
   4453          PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1);
   4454          mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
   4455          return;
   4456       }
   4457 
   4458       /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
   4459          suitably aligned, is mapped, and addressible. */
   4460       for (j = 0; j < nULongs; j++) {
   4461          sm       = get_secmap_for_reading_low(a + 8*j);
   4462          sm_off16 = SM_OFF_16(a + 8*j);
   4463          vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4464 
   4465          // Convert V bits from compact memory form to expanded
   4466          // register form.
   4467          if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
   4468             res[j] = V_BITS64_DEFINED;
   4469          } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
   4470             res[j] = V_BITS64_UNDEFINED;
   4471          } else {
   4472             /* Slow case: some block of 8 bytes are not all-defined or
   4473                all-undefined. */
   4474             PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2);
   4475             mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
   4476             return;
   4477          }
   4478       }
   4479       return;
   4480    }
   4481 #endif
   4482 }
   4483 
   4484 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
   4485 {
   4486    mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
   4487 }
   4488 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
   4489 {
   4490    mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
   4491 }
   4492 
   4493 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
   4494 {
   4495    mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
   4496 }
   4497 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
   4498 {
   4499    mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
   4500 }
   4501 
   4502 /*------------------------------------------------------------*/
   4503 /*--- LOADV64                                              ---*/
   4504 /*------------------------------------------------------------*/
   4505 
   4506 static INLINE
   4507 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
   4508 {
   4509    PROF_EVENT(MCPE_LOADV64);
   4510 
   4511 #ifndef PERF_FAST_LOADV
   4512    return mc_LOADVn_slow( a, 64, isBigEndian );
   4513 #else
   4514    {
   4515       UWord   sm_off16, vabits16;
   4516       SecMap* sm;
   4517 
   4518       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4519          PROF_EVENT(MCPE_LOADV64_SLOW1);
   4520          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
   4521       }
   4522 
   4523       sm       = get_secmap_for_reading_low(a);
   4524       sm_off16 = SM_OFF_16(a);
   4525       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4526 
   4527       // Handle common case quickly: a is suitably aligned, is mapped, and
   4528       // addressible.
   4529       // Convert V bits from compact memory form to expanded register form.
   4530       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
   4531          return V_BITS64_DEFINED;
   4532       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
   4533          return V_BITS64_UNDEFINED;
   4534       } else {
   4535          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
   4536          PROF_EVENT(MCPE_LOADV64_SLOW2);
   4537          return mc_LOADVn_slow( a, 64, isBigEndian );
   4538       }
   4539    }
   4540 #endif
   4541 }
   4542 
   4543 // Generic for all platforms
   4544 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
   4545 {
   4546    return mc_LOADV64(a, True);
   4547 }
   4548 
   4549 // Non-generic assembly for arm32-linux
   4550 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   4551     && defined(VGP_arm_linux)
   4552 __asm__( /* Derived from the 32 bit assembly helper */
   4553 ".text                                  \n"
   4554 ".align 2                               \n"
   4555 ".global vgMemCheck_helperc_LOADV64le   \n"
   4556 ".type   vgMemCheck_helperc_LOADV64le, %function \n"
   4557 "vgMemCheck_helperc_LOADV64le:          \n"
   4558 "      tst    r0, #7                    \n"
   4559 "      movw   r3, #:lower16:primary_map \n"
   4560 "      bne    .LLV64LEc4                \n" // if misaligned
   4561 "      lsr    r2, r0, #16               \n"
   4562 "      movt   r3, #:upper16:primary_map \n"
   4563 "      ldr    r2, [r3, r2, lsl #2]      \n"
   4564 "      uxth   r1, r0                    \n" // r1 is 0-(16)-0 X-(13)-X 000
   4565 "      movw   r3, #0xAAAA               \n"
   4566 "      lsr    r1, r1, #2                \n" // r1 is 0-(16)-0 00 X-(13)-X 0
   4567 "      ldrh   r1, [r2, r1]              \n"
   4568 "      cmp    r1, r3                    \n" // 0xAAAA == VA_BITS16_DEFINED
   4569 "      bne    .LLV64LEc0                \n" // if !all_defined
   4570 "      mov    r1, #0x0                  \n" // 0x0 == V_BITS32_DEFINED
   4571 "      mov    r0, #0x0                  \n" // 0x0 == V_BITS32_DEFINED
   4572 "      bx     lr                        \n"
   4573 ".LLV64LEc0:                            \n"
   4574 "      movw   r3, #0x5555               \n"
   4575 "      cmp    r1, r3                    \n" // 0x5555 == VA_BITS16_UNDEFINED
   4576 "      bne    .LLV64LEc4                \n" // if !all_undefined
   4577 "      mov    r1, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
   4578 "      mov    r0, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
   4579 "      bx     lr                        \n"
   4580 ".LLV64LEc4:                            \n"
   4581 "      push   {r4, lr}                  \n"
   4582 "      mov    r2, #0                    \n"
   4583 "      mov    r1, #64                   \n"
   4584 "      bl     mc_LOADVn_slow            \n"
   4585 "      pop    {r4, pc}                  \n"
   4586 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n"
   4587 ".previous\n"
   4588 );
   4589 
   4590 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   4591       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
   4592 __asm__(
   4593 ".text\n"
   4594 ".align 16\n"
   4595 ".global vgMemCheck_helperc_LOADV64le\n"
   4596 ".type   vgMemCheck_helperc_LOADV64le, @function\n"
   4597 "vgMemCheck_helperc_LOADV64le:\n"
   4598 "      test   $0x7,  %eax\n"
   4599 "      jne    .LLV64LE2\n"          /* jump if not aligned */
   4600 "      mov    %eax,  %ecx\n"
   4601 "      movzwl %ax,   %edx\n"
   4602 "      shr    $0x10, %ecx\n"
   4603 "      mov    primary_map(,%ecx,4), %ecx\n"
   4604 "      shr    $0x3,  %edx\n"
   4605 "      movzwl (%ecx,%edx,2), %edx\n"
   4606 "      cmp    $0xaaaa, %edx\n"
   4607 "      jne    .LLV64LE1\n"          /* jump if not all defined */
   4608 "      xor    %eax, %eax\n"         /* return 0 in edx:eax */
   4609 "      xor    %edx, %edx\n"
   4610 "      ret\n"
   4611 ".LLV64LE1:\n"
   4612 "      cmp    $0x5555, %edx\n"
   4613 "      jne    .LLV64LE2\n"         /* jump if not all undefined */
   4614 "      or     $0xffffffff, %eax\n" /* else return all bits set in edx:eax */
   4615 "      or     $0xffffffff, %edx\n"
   4616 "      ret\n"
   4617 ".LLV64LE2:\n"
   4618 "      xor    %ecx,  %ecx\n"  /* tail call to mc_LOADVn_slow(a, 64, 0) */
   4619 "      mov    $64,   %edx\n"
   4620 "      jmp    mc_LOADVn_slow\n"
   4621 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n"
   4622 ".previous\n"
   4623 );
   4624 
   4625 #else
   4626 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
   4627 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
   4628 {
   4629    return mc_LOADV64(a, False);
   4630 }
   4631 #endif
   4632 
   4633 /*------------------------------------------------------------*/
   4634 /*--- STOREV64                                             ---*/
   4635 /*------------------------------------------------------------*/
   4636 
   4637 static INLINE
   4638 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
   4639 {
   4640    PROF_EVENT(MCPE_STOREV64);
   4641 
   4642 #ifndef PERF_FAST_STOREV
   4643    // XXX: this slow case seems to be marginally faster than the fast case!
   4644    // Investigate further.
   4645    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4646 #else
   4647    {
   4648       UWord   sm_off16, vabits16;
   4649       SecMap* sm;
   4650 
   4651       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4652          PROF_EVENT(MCPE_STOREV64_SLOW1);
   4653          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4654          return;
   4655       }
   4656 
   4657       sm       = get_secmap_for_reading_low(a);
   4658       sm_off16 = SM_OFF_16(a);
   4659       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4660 
   4661       // To understand the below cleverness, see the extensive comments
   4662       // in MC_(helperc_STOREV8).
   4663       if (LIKELY(V_BITS64_DEFINED == vbits64)) {
   4664          if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
   4665             return;
   4666          }
   4667          if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
   4668             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
   4669             return;
   4670          }
   4671          PROF_EVENT(MCPE_STOREV64_SLOW2);
   4672          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4673          return;
   4674       }
   4675       if (V_BITS64_UNDEFINED == vbits64) {
   4676          if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
   4677             return;
   4678          }
   4679          if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
   4680             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
   4681             return;
   4682          }
   4683          PROF_EVENT(MCPE_STOREV64_SLOW3);
   4684          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4685          return;
   4686       }
   4687 
   4688       PROF_EVENT(MCPE_STOREV64_SLOW4);
   4689       mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4690    }
   4691 #endif
   4692 }
   4693 
   4694 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
   4695 {
   4696    mc_STOREV64(a, vbits64, True);
   4697 }
   4698 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
   4699 {
   4700    mc_STOREV64(a, vbits64, False);
   4701 }
   4702 
   4703 /*------------------------------------------------------------*/
   4704 /*--- LOADV32                                              ---*/
   4705 /*------------------------------------------------------------*/
   4706 
   4707 static INLINE
   4708 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
   4709 {
   4710    PROF_EVENT(MCPE_LOADV32);
   4711 
   4712 #ifndef PERF_FAST_LOADV
   4713    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4714 #else
   4715    {
   4716       UWord   sm_off, vabits8;
   4717       SecMap* sm;
   4718 
   4719       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   4720          PROF_EVENT(MCPE_LOADV32_SLOW1);
   4721          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4722       }
   4723 
   4724       sm      = get_secmap_for_reading_low(a);
   4725       sm_off  = SM_OFF(a);
   4726       vabits8 = sm->vabits8[sm_off];
   4727 
   4728       // Handle common case quickly: a is suitably aligned, is mapped, and the
   4729       // entire word32 it lives in is addressible.
   4730       // Convert V bits from compact memory form to expanded register form.
   4731       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
   4732       // Almost certainly not necessary, but be paranoid.
   4733       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   4734          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   4735       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
   4736          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   4737       } else {
   4738          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
   4739          PROF_EVENT(MCPE_LOADV32_SLOW2);
   4740          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4741       }
   4742    }
   4743 #endif
   4744 }
   4745 
   4746 // Generic for all platforms
   4747 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
   4748 {
   4749    return mc_LOADV32(a, True);
   4750 }
   4751 
   4752 // Non-generic assembly for arm32-linux
   4753 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   4754     && defined(VGP_arm_linux)
   4755 __asm__( /* Derived from NCode template */
   4756 ".text                                  \n"
   4757 ".align 2                               \n"
   4758 ".global vgMemCheck_helperc_LOADV32le   \n"
   4759 ".type   vgMemCheck_helperc_LOADV32le, %function \n"
   4760 "vgMemCheck_helperc_LOADV32le:          \n"
   4761 "      tst    r0, #3                    \n" // 1
   4762 "      movw   r3, #:lower16:primary_map \n" // 1
   4763 "      bne    .LLV32LEc4                \n" // 2  if misaligned
   4764 "      lsr    r2, r0, #16               \n" // 3
   4765 "      movt   r3, #:upper16:primary_map \n" // 3
   4766 "      ldr    r2, [r3, r2, lsl #2]      \n" // 4
   4767 "      uxth   r1, r0                    \n" // 4
   4768 "      ldrb   r1, [r2, r1, lsr #2]      \n" // 5
   4769 "      cmp    r1, #0xAA                 \n" // 6  0xAA == VA_BITS8_DEFINED
   4770 "      bne    .LLV32LEc0                \n" // 7  if !all_defined
   4771 "      mov    r0, #0x0                  \n" // 8  0x0 == V_BITS32_DEFINED
   4772 "      bx     lr                        \n" // 9
   4773 ".LLV32LEc0:                            \n"
   4774 "      cmp    r1, #0x55                 \n" // 0x55 == VA_BITS8_UNDEFINED
   4775 "      bne    .LLV32LEc4                \n" // if !all_undefined
   4776 "      mov    r0, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
   4777 "      bx     lr                        \n"
   4778 ".LLV32LEc4:                            \n"
   4779 "      push   {r4, lr}                  \n"
   4780 "      mov    r2, #0                    \n"
   4781 "      mov    r1, #32                   \n"
   4782 "      bl     mc_LOADVn_slow            \n"
   4783 "      pop    {r4, pc}                  \n"
   4784 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n"
   4785 ".previous\n"
   4786 );
   4787 
   4788 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   4789       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
   4790 __asm__(
   4791 ".text\n"
   4792 ".align 16\n"
   4793 ".global vgMemCheck_helperc_LOADV32le\n"
   4794 ".type   vgMemCheck_helperc_LOADV32le, @function\n"
   4795 "vgMemCheck_helperc_LOADV32le:\n"
   4796 "      test   $0x3,  %eax\n"
   4797 "      jnz    .LLV32LE2\n"         /* jump if misaligned */
   4798 "      mov    %eax,  %edx\n"
   4799 "      shr    $16,   %edx\n"
   4800 "      mov    primary_map(,%edx,4), %ecx\n"
   4801 "      movzwl %ax,   %edx\n"
   4802 "      shr    $2,    %edx\n"
   4803 "      movzbl (%ecx,%edx,1), %edx\n"
   4804 "      cmp    $0xaa, %edx\n"       /* compare to VA_BITS8_DEFINED */
   4805 "      jne    .LLV32LE1\n"         /* jump if not completely defined */
   4806 "      xor    %eax,  %eax\n"       /* else return V_BITS32_DEFINED */
   4807 "      ret\n"
   4808 ".LLV32LE1:\n"
   4809 "      cmp    $0x55, %edx\n"       /* compare to VA_BITS8_UNDEFINED */
   4810 "      jne    .LLV32LE2\n"         /* jump if not completely undefined */
   4811 "      or     $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */
   4812 "      ret\n"
   4813 ".LLV32LE2:\n"
   4814 "      xor    %ecx,  %ecx\n"       /* tail call mc_LOADVn_slow(a, 32, 0) */
   4815 "      mov    $32,   %edx\n"
   4816 "      jmp    mc_LOADVn_slow\n"
   4817 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n"
   4818 ".previous\n"
   4819 );
   4820 
   4821 #else
   4822 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
   4823 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
   4824 {
   4825    return mc_LOADV32(a, False);
   4826 }
   4827 #endif
   4828 
   4829 /*------------------------------------------------------------*/
   4830 /*--- STOREV32                                             ---*/
   4831 /*------------------------------------------------------------*/
   4832 
   4833 static INLINE
   4834 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
   4835 {
   4836    PROF_EVENT(MCPE_STOREV32);
   4837 
   4838 #ifndef PERF_FAST_STOREV
   4839    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4840 #else
   4841    {
   4842       UWord   sm_off, vabits8;
   4843       SecMap* sm;
   4844 
   4845       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   4846          PROF_EVENT(MCPE_STOREV32_SLOW1);
   4847          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4848          return;
   4849       }
   4850 
   4851       sm      = get_secmap_for_reading_low(a);
   4852       sm_off  = SM_OFF(a);
   4853       vabits8 = sm->vabits8[sm_off];
   4854 
   4855       // To understand the below cleverness, see the extensive comments
   4856       // in MC_(helperc_STOREV8).
   4857       if (LIKELY(V_BITS32_DEFINED == vbits32)) {
   4858          if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
   4859             return;
   4860          }
   4861          if (!is_distinguished_sm(sm)  && VA_BITS8_UNDEFINED == vabits8) {
   4862             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
   4863             return;
   4864          }
   4865          PROF_EVENT(MCPE_STOREV32_SLOW2);
   4866          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4867          return;
   4868       }
   4869       if (V_BITS32_UNDEFINED == vbits32) {
   4870          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
   4871             return;
   4872          }
   4873          if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
   4874             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
   4875             return;
   4876          }
   4877          PROF_EVENT(MCPE_STOREV32_SLOW3);
   4878          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4879          return;
   4880       }
   4881 
   4882       PROF_EVENT(MCPE_STOREV32_SLOW4);
   4883       mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4884    }
   4885 #endif
   4886 }
   4887 
   4888 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
   4889 {
   4890    mc_STOREV32(a, vbits32, True);
   4891 }
   4892 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
   4893 {
   4894    mc_STOREV32(a, vbits32, False);
   4895 }
   4896 
   4897 /*------------------------------------------------------------*/
   4898 /*--- LOADV16                                              ---*/
   4899 /*------------------------------------------------------------*/
   4900 
   4901 static INLINE
   4902 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
   4903 {
   4904    PROF_EVENT(MCPE_LOADV16);
   4905 
   4906 #ifndef PERF_FAST_LOADV
   4907    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4908 #else
   4909    {
   4910       UWord   sm_off, vabits8;
   4911       SecMap* sm;
   4912 
   4913       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   4914          PROF_EVENT(MCPE_LOADV16_SLOW1);
   4915          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4916       }
   4917 
   4918       sm      = get_secmap_for_reading_low(a);
   4919       sm_off  = SM_OFF(a);
   4920       vabits8 = sm->vabits8[sm_off];
   4921       // Handle common case quickly: a is suitably aligned, is mapped, and is
   4922       // addressible.
   4923       // Convert V bits from compact memory form to expanded register form
   4924       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
   4925       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
   4926       else {
   4927          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   4928          // the two sub-bytes.
   4929          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
   4930          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
   4931          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
   4932          else {
   4933             /* Slow case: the two bytes are not all-defined or all-undefined. */
   4934             PROF_EVENT(MCPE_LOADV16_SLOW2);
   4935             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4936          }
   4937       }
   4938    }
   4939 #endif
   4940 }
   4941 
   4942 // Generic for all platforms
   4943 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
   4944 {
   4945    return mc_LOADV16(a, True);
   4946 }
   4947 
   4948 // Non-generic assembly for arm32-linux
   4949 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   4950     && defined(VGP_arm_linux)
   4951 __asm__( /* Derived from NCode template */
   4952 ".text                                  \n"
   4953 ".align 2                               \n"
   4954 ".global vgMemCheck_helperc_LOADV16le   \n"
   4955 ".type   vgMemCheck_helperc_LOADV16le, %function \n"
   4956 "vgMemCheck_helperc_LOADV16le:          \n" //
   4957 "      tst    r0, #1                    \n" //
   4958 "      bne    .LLV16LEc12               \n" // if misaligned
   4959 "      lsr    r2, r0, #16               \n" // r2 = pri-map-ix
   4960 "      movw   r3, #:lower16:primary_map \n" //
   4961 "      uxth   r1, r0                    \n" // r1 = sec-map-offB
   4962 "      movt   r3, #:upper16:primary_map \n" //
   4963 "      ldr    r2, [r3, r2, lsl #2]      \n" // r2 = sec-map
   4964 "      ldrb   r1, [r2, r1, lsr #2]      \n" // r1 = sec-map-VABITS8
   4965 "      cmp    r1, #0xAA                 \n" // r1 == VA_BITS8_DEFINED?
   4966 "      bne    .LLV16LEc0                \n" // no, goto .LLV16LEc0
   4967 ".LLV16LEh9:                            \n" //
   4968 "      mov    r0, #0xFFFFFFFF           \n" //
   4969 "      lsl    r0, r0, #16               \n" // V_BITS16_DEFINED | top16safe
   4970 "      bx     lr                        \n" //
   4971 ".LLV16LEc0:                            \n" //
   4972 "      cmp    r1, #0x55                 \n" // VA_BITS8_UNDEFINED
   4973 "      bne    .LLV16LEc4                \n" //
   4974 ".LLV16LEc2:                            \n" //
   4975 "      mov    r0, #0xFFFFFFFF           \n" // V_BITS16_UNDEFINED | top16safe
   4976 "      bx     lr                        \n" //
   4977 ".LLV16LEc4:                            \n" //
   4978        // r1 holds sec-map-VABITS8.  r0 holds the address and is 2-aligned.
   4979        // Extract the relevant 4 bits and inspect.
   4980 "      and    r2, r0, #2       \n" // addr & 2
   4981 "      add    r2, r2, r2       \n" // 2 * (addr & 2)
   4982 "      lsr    r1, r1, r2       \n" // sec-map-VABITS8 >> (2 * (addr & 2))
   4983 "      and    r1, r1, #15      \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
   4984 
   4985 "      cmp    r1, #0xA                  \n" // VA_BITS4_DEFINED
   4986 "      beq    .LLV16LEh9                \n" //
   4987 
   4988 "      cmp    r1, #0x5                  \n" // VA_BITS4_UNDEFINED
   4989 "      beq    .LLV16LEc2                \n" //
   4990 
   4991 ".LLV16LEc12:                           \n" //
   4992 "      push   {r4, lr}                  \n" //
   4993 "      mov    r2, #0                    \n" //
   4994 "      mov    r1, #16                   \n" //
   4995 "      bl     mc_LOADVn_slow            \n" //
   4996 "      pop    {r4, pc}                  \n" //
   4997 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
   4998 ".previous\n"
   4999 );
   5000 
   5001 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   5002       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
   5003 __asm__(
   5004 ".text\n"
   5005 ".align 16\n"
   5006 ".global vgMemCheck_helperc_LOADV16le\n"
   5007 ".type   vgMemCheck_helperc_LOADV16le, @function\n"
   5008 "vgMemCheck_helperc_LOADV16le:\n"
   5009 "      test   $0x1,  %eax\n"
   5010 "      jne    .LLV16LE5\n"          /* jump if not aligned */
   5011 "      mov    %eax,  %edx\n"
   5012 "      shr    $0x10, %edx\n"
   5013 "      mov    primary_map(,%edx,4), %ecx\n"
   5014 "      movzwl %ax,   %edx\n"
   5015 "      shr    $0x2,  %edx\n"
   5016 "      movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
   5017 "      cmp    $0xaa, %edx\n"        /* compare to VA_BITS8_DEFINED */
   5018 "      jne    .LLV16LE2\n"          /* jump if not all 32bits defined */
   5019 ".LLV16LE1:\n"
   5020 "      mov    $0xffff0000,%eax\n"   /* V_BITS16_DEFINED | top16safe */
   5021 "      ret\n"
   5022 ".LLV16LE2:\n"
   5023 "      cmp    $0x55, %edx\n"        /* compare to VA_BITS8_UNDEFINED */
   5024 "      jne    .LLV16LE4\n"          /* jump if not all 32bits undefined */
   5025 ".LLV16LE3:\n"
   5026 "      or     $0xffffffff,%eax\n"   /* V_BITS16_UNDEFINED | top16safe */
   5027 "      ret\n"
   5028 ".LLV16LE4:\n"
   5029 "      mov    %eax,  %ecx\n"
   5030 "      and    $0x2,  %ecx\n"
   5031 "      add    %ecx,  %ecx\n"
   5032 "      sar    %cl,   %edx\n"
   5033 "      and    $0xf,  %edx\n"
   5034 "      cmp    $0xa,  %edx\n"
   5035 "      je     .LLV16LE1\n"          /* jump if all 16bits are defined */
   5036 "      cmp    $0x5,  %edx\n"
   5037 "      je     .LLV16LE3\n"          /* jump if all 16bits are undefined */
   5038 ".LLV16LE5:\n"
   5039 "      xor    %ecx,  %ecx\n"        /* tail call mc_LOADVn_slow(a, 16, 0) */
   5040 "      mov    $16,   %edx\n"
   5041 "      jmp    mc_LOADVn_slow\n"
   5042 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
   5043 ".previous\n"
   5044 );
   5045 
   5046 #else
   5047 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
   5048 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
   5049 {
   5050    return mc_LOADV16(a, False);
   5051 }
   5052 #endif
   5053 
   5054 /*------------------------------------------------------------*/
   5055 /*--- STOREV16                                             ---*/
   5056 /*------------------------------------------------------------*/
   5057 
   5058 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
   5059 static INLINE
   5060 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
   5061 {
   5062    UInt shift;
   5063    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
   5064    shift = (a & 2) << 1;               // shift by 0 or 4
   5065    vabits8 >>= shift;                  // shift the four bits to the bottom
   5066     // check 2 x vabits2 != VA_BITS2_NOACCESS
   5067    return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
   5068       &&  ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
   5069 }
   5070 
   5071 static INLINE
   5072 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
   5073 {
   5074    PROF_EVENT(MCPE_STOREV16);
   5075 
   5076 #ifndef PERF_FAST_STOREV
   5077    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   5078 #else
   5079    {
   5080       UWord   sm_off, vabits8;
   5081       SecMap* sm;
   5082 
   5083       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   5084          PROF_EVENT(MCPE_STOREV16_SLOW1);
   5085          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   5086          return;
   5087       }
   5088 
   5089       sm      = get_secmap_for_reading_low(a);
   5090       sm_off  = SM_OFF(a);
   5091       vabits8 = sm->vabits8[sm_off];
   5092 
   5093       // To understand the below cleverness, see the extensive comments
   5094       // in MC_(helperc_STOREV8).
   5095       if (LIKELY(V_BITS16_DEFINED == vbits16)) {
   5096          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   5097             return;
   5098          }
   5099          if (!is_distinguished_sm(sm)
   5100              && accessible_vabits4_in_vabits8(a, vabits8)) {
   5101             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
   5102                                          &(sm->vabits8[sm_off]) );
   5103             return;
   5104          }
   5105          PROF_EVENT(MCPE_STOREV16_SLOW2);
   5106          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   5107       }
   5108       if (V_BITS16_UNDEFINED == vbits16) {
   5109          if (vabits8 == VA_BITS8_UNDEFINED) {
   5110             return;
   5111          }
   5112          if (!is_distinguished_sm(sm)
   5113              && accessible_vabits4_in_vabits8(a, vabits8)) {
   5114             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
   5115                                          &(sm->vabits8[sm_off]) );
   5116             return;
   5117          }
   5118          PROF_EVENT(MCPE_STOREV16_SLOW3);
   5119          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   5120          return;
   5121       }
   5122 
   5123       PROF_EVENT(MCPE_STOREV16_SLOW4);
   5124       mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   5125    }
   5126 #endif
   5127 }
   5128 
   5129 
   5130 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
   5131 {
   5132    mc_STOREV16(a, vbits16, True);
   5133 }
   5134 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
   5135 {
   5136    mc_STOREV16(a, vbits16, False);
   5137 }
   5138 
   5139 /*------------------------------------------------------------*/
   5140 /*--- LOADV8                                               ---*/
   5141 /*------------------------------------------------------------*/
   5142 
   5143 /* Note: endianness is irrelevant for size == 1 */
   5144 
   5145 // Non-generic assembly for arm32-linux
   5146 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   5147     && defined(VGP_arm_linux)
   5148 __asm__( /* Derived from NCode template */
   5149 ".text                                  \n"
   5150 ".align 2                               \n"
   5151 ".global vgMemCheck_helperc_LOADV8      \n"
   5152 ".type   vgMemCheck_helperc_LOADV8, %function \n"
   5153 "vgMemCheck_helperc_LOADV8:             \n" //
   5154 "      lsr    r2, r0, #16               \n" // r2 = pri-map-ix
   5155 "      movw   r3, #:lower16:primary_map \n" //
   5156 "      uxth   r1, r0                    \n" // r1 = sec-map-offB
   5157 "      movt   r3, #:upper16:primary_map \n" //
   5158 "      ldr    r2, [r3, r2, lsl #2]      \n" // r2 = sec-map
   5159 "      ldrb   r1, [r2, r1, lsr #2]      \n" // r1 = sec-map-VABITS8
   5160 "      cmp    r1, #0xAA                 \n" // r1 == VA_BITS8_DEFINED?
   5161 "      bne    .LLV8c0                   \n" // no, goto .LLV8c0
   5162 ".LLV8h9:                               \n" //
   5163 "      mov    r0, #0xFFFFFF00           \n" // V_BITS8_DEFINED | top24safe
   5164 "      bx     lr                        \n" //
   5165 ".LLV8c0:                               \n" //
   5166 "      cmp    r1, #0x55                 \n" // VA_BITS8_UNDEFINED
   5167 "      bne    .LLV8c4                   \n" //
   5168 ".LLV8c2:                               \n" //
   5169 "      mov    r0, #0xFFFFFFFF           \n" // V_BITS8_UNDEFINED | top24safe
   5170 "      bx     lr                        \n" //
   5171 ".LLV8c4:                               \n" //
   5172        // r1 holds sec-map-VABITS8
   5173        // r0 holds the address.  Extract the relevant 2 bits and inspect.
   5174 "      and    r2, r0, #3       \n" // addr & 3
   5175 "      add    r2, r2, r2       \n" // 2 * (addr & 3)
   5176 "      lsr    r1, r1, r2       \n" // sec-map-VABITS8 >> (2 * (addr & 3))
   5177 "      and    r1, r1, #3       \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
   5178 
   5179 "      cmp    r1, #2                    \n" // VA_BITS2_DEFINED
   5180 "      beq    .LLV8h9                   \n" //
   5181 
   5182 "      cmp    r1, #1                    \n" // VA_BITS2_UNDEFINED
   5183 "      beq    .LLV8c2                   \n" //
   5184 
   5185 "      push   {r4, lr}                  \n" //
   5186 "      mov    r2, #0                    \n" //
   5187 "      mov    r1, #8                    \n" //
   5188 "      bl     mc_LOADVn_slow            \n" //
   5189 "      pop    {r4, pc}                  \n" //
   5190 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
   5191 ".previous\n"
   5192 );
   5193 
   5194 /* Non-generic assembly for x86-linux */
   5195 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   5196       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
   5197 __asm__(
   5198 ".text\n"
   5199 ".align 16\n"
   5200 ".global vgMemCheck_helperc_LOADV8\n"
   5201 ".type   vgMemCheck_helperc_LOADV8, @function\n"
   5202 "vgMemCheck_helperc_LOADV8:\n"
   5203 "      mov    %eax,  %edx\n"
   5204 "      shr    $0x10, %edx\n"
   5205 "      mov    primary_map(,%edx,4), %ecx\n"
   5206 "      movzwl %ax,   %edx\n"
   5207 "      shr    $0x2,  %edx\n"
   5208 "      movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
   5209 "      cmp    $0xaa, %edx\n"        /* compare to VA_BITS8_DEFINED? */
   5210 "      jne    .LLV8LE2\n"           /* jump if not defined */
   5211 ".LLV8LE1:\n"
   5212 "      mov    $0xffffff00, %eax\n"  /* V_BITS8_DEFINED | top24safe */
   5213 "      ret\n"
   5214 ".LLV8LE2:\n"
   5215 "      cmp    $0x55, %edx\n"        /* compare to VA_BITS8_UNDEFINED */
   5216 "      jne    .LLV8LE4\n"           /* jump if not all 32bits are undefined */
   5217 ".LLV8LE3:\n"
   5218 "      or     $0xffffffff, %eax\n"  /* V_BITS8_UNDEFINED | top24safe */
   5219 "      ret\n"
   5220 ".LLV8LE4:\n"
   5221 "      mov    %eax,  %ecx\n"
   5222 "      and    $0x3,  %ecx\n"
   5223 "      add    %ecx,  %ecx\n"
   5224 "      sar    %cl,   %edx\n"
   5225 "      and    $0x3,  %edx\n"
   5226 "      cmp    $0x2,  %edx\n"
   5227 "      je     .LLV8LE1\n"           /* jump if all 8bits are defined */
   5228 "      cmp    $0x1,  %edx\n"
   5229 "      je     .LLV8LE3\n"           /* jump if all 8bits are undefined */
   5230 "      xor    %ecx,  %ecx\n"        /* tail call to mc_LOADVn_slow(a, 8, 0) */
   5231 "      mov    $0x8,  %edx\n"
   5232 "      jmp    mc_LOADVn_slow\n"
   5233 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
   5234 ".previous\n"
   5235 );
   5236 
   5237 #else
   5238 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
   5239 VG_REGPARM(1)
   5240 UWord MC_(helperc_LOADV8) ( Addr a )
   5241 {
   5242    PROF_EVENT(MCPE_LOADV8);
   5243 
   5244 #ifndef PERF_FAST_LOADV
   5245    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   5246 #else
   5247    {
   5248       UWord   sm_off, vabits8;
   5249       SecMap* sm;
   5250 
   5251       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   5252          PROF_EVENT(MCPE_LOADV8_SLOW1);
   5253          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   5254       }
   5255 
   5256       sm      = get_secmap_for_reading_low(a);
   5257       sm_off  = SM_OFF(a);
   5258       vabits8 = sm->vabits8[sm_off];
   5259       // Convert V bits from compact memory form to expanded register form
   5260       // Handle common case quickly: a is mapped, and the entire
   5261       // word32 it lives in is addressible.
   5262       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
   5263       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
   5264       else {
   5265          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   5266          // the single byte.
   5267          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
   5268          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
   5269          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
   5270          else {
   5271             /* Slow case: the byte is not all-defined or all-undefined. */
   5272             PROF_EVENT(MCPE_LOADV8_SLOW2);
   5273             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   5274          }
   5275       }
   5276    }
   5277 #endif
   5278 }
   5279 #endif
   5280 
   5281 /*------------------------------------------------------------*/
   5282 /*--- STOREV8                                              ---*/
   5283 /*------------------------------------------------------------*/
   5284 
   5285 VG_REGPARM(2)
   5286 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
   5287 {
   5288    PROF_EVENT(MCPE_STOREV8);
   5289 
   5290 #ifndef PERF_FAST_STOREV
   5291    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   5292 #else
   5293    {
   5294       UWord   sm_off, vabits8;
   5295       SecMap* sm;
   5296 
   5297       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   5298          PROF_EVENT(MCPE_STOREV8_SLOW1);
   5299          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   5300          return;
   5301       }
   5302 
   5303       sm      = get_secmap_for_reading_low(a);
   5304       sm_off  = SM_OFF(a);
   5305       vabits8 = sm->vabits8[sm_off];
   5306 
   5307       // Clevernesses to speed up storing V bits.
   5308       // The 64/32/16 bit cases also have similar clevernesses, but it
   5309       // works a little differently to the code below.
   5310       //
   5311       // Cleverness 1:  sometimes we don't have to write the shadow memory at
   5312       // all, if we can tell that what we want to write is the same as what is
   5313       // already there. These cases are marked below as "defined on defined" and
   5314       // "undefined on undefined".
   5315       //
   5316       // Cleverness 2:
   5317       // We also avoid to call mc_STOREVn_slow if the V bits can directly
   5318       // be written in the secondary map. V bits can be directly written
   5319       // if 4 conditions are respected:
   5320       //   * The address for which V bits are written is naturally aligned
   5321       //        on 1 byte  for STOREV8 (this is always true)
   5322       //        on 2 bytes for STOREV16
   5323       //        on 4 bytes for STOREV32
   5324       //        on 8 bytes for STOREV64.
   5325       //   * V bits being written are either fully defined or fully undefined.
   5326       //     (for partially defined V bits, V bits cannot be directly written,
   5327       //      as the secondary vbits table must be maintained).
   5328       //   * the secmap is not distinguished (distinguished maps cannot be
   5329       //     modified).
   5330       //   * the memory corresponding to the V bits being written is
   5331       //     accessible (if one or more bytes are not accessible,
   5332       //     we must call mc_STOREVn_slow in order to report accessibility
   5333       //     errors).
   5334       //     Note that for STOREV32 and STOREV64, it is too expensive
   5335       //     to verify the accessibility of each byte for the benefit it
   5336       //     brings. Instead, a quicker check is done by comparing to
   5337       //     VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
   5338       //     but misses some opportunity of direct modifications.
   5339       //     Checking each byte accessibility was measured for
   5340       //     STOREV32+perf tests and was slowing down all perf tests.
   5341       // The cases corresponding to cleverness 2 are marked below as
   5342       // "direct mod".
   5343       if (LIKELY(V_BITS8_DEFINED == vbits8)) {
   5344          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   5345             return; // defined on defined
   5346          }
   5347          if (!is_distinguished_sm(sm)
   5348              && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
   5349             // direct mod
   5350             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
   5351                                          &(sm->vabits8[sm_off]) );
   5352             return;
   5353          }
   5354          PROF_EVENT(MCPE_STOREV8_SLOW2);
   5355          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   5356          return;
   5357       }
   5358       if (V_BITS8_UNDEFINED == vbits8) {
   5359          if (vabits8 == VA_BITS8_UNDEFINED) {
   5360             return; // undefined on undefined
   5361          }
   5362          if (!is_distinguished_sm(sm)
   5363              && (VA_BITS2_NOACCESS
   5364                  != extract_vabits2_from_vabits8(a, vabits8))) {
   5365             // direct mod
   5366             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
   5367                                          &(sm->vabits8[sm_off]) );
   5368             return;
   5369          }
   5370          PROF_EVENT(MCPE_STOREV8_SLOW3);
   5371          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   5372          return;
   5373       }
   5374 
   5375       // Partially defined word
   5376       PROF_EVENT(MCPE_STOREV8_SLOW4);
   5377       mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   5378    }
   5379 #endif
   5380 }
   5381 
   5382 
   5383 /*------------------------------------------------------------*/
   5384 /*--- Functions called directly from generated code:       ---*/
   5385 /*--- Value-check failure handlers.                        ---*/
   5386 /*------------------------------------------------------------*/
   5387 
   5388 /* Call these ones when an origin is available ... */
   5389 VG_REGPARM(1)
   5390 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
   5391    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
   5392 }
   5393 
   5394 VG_REGPARM(1)
   5395 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
   5396    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
   5397 }
   5398 
   5399 VG_REGPARM(1)
   5400 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
   5401    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
   5402 }
   5403 
   5404 VG_REGPARM(1)
   5405 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
   5406    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
   5407 }
   5408 
   5409 VG_REGPARM(2)
   5410 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
   5411    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
   5412 }
   5413 
   5414 /* ... and these when an origin isn't available. */
   5415 
   5416 VG_REGPARM(0)
   5417 void MC_(helperc_value_check0_fail_no_o) ( void ) {
   5418    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
   5419 }
   5420 
   5421 VG_REGPARM(0)
   5422 void MC_(helperc_value_check1_fail_no_o) ( void ) {
   5423    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
   5424 }
   5425 
   5426 VG_REGPARM(0)
   5427 void MC_(helperc_value_check4_fail_no_o) ( void ) {
   5428    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
   5429 }
   5430 
   5431 VG_REGPARM(0)
   5432 void MC_(helperc_value_check8_fail_no_o) ( void ) {
   5433    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
   5434 }
   5435 
   5436 VG_REGPARM(1)
   5437 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
   5438    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
   5439 }
   5440 
   5441 
   5442 /*------------------------------------------------------------*/
   5443 /*--- Metadata get/set functions, for client requests.     ---*/
   5444 /*------------------------------------------------------------*/
   5445 
   5446 // Nb: this expands the V+A bits out into register-form V bits, even though
   5447 // they're in memory.  This is for backward compatibility, and because it's
   5448 // probably what the user wants.
   5449 
   5450 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
   5451    error [no longer used], 3 == addressing error. */
   5452 /* Nb: We used to issue various definedness/addressability errors from here,
   5453    but we took them out because they ranged from not-very-helpful to
   5454    downright annoying, and they complicated the error data structures. */
   5455 static Int mc_get_or_set_vbits_for_client (
   5456    Addr a,
   5457    Addr vbits,
   5458    SizeT szB,
   5459    Bool setting, /* True <=> set vbits,  False <=> get vbits */
   5460    Bool is_client_request /* True <=> real user request
   5461                              False <=> internal call from gdbserver */
   5462 )
   5463 {
   5464    SizeT i;
   5465    Bool  ok;
   5466    UChar vbits8;
   5467 
   5468    /* Check that arrays are addressible before doing any getting/setting.
   5469       vbits to be checked only for real user request. */
   5470    for (i = 0; i < szB; i++) {
   5471       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
   5472           (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
   5473          return 3;
   5474       }
   5475    }
   5476 
   5477    /* Do the copy */
   5478    if (setting) {
   5479       /* setting */
   5480       for (i = 0; i < szB; i++) {
   5481          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
   5482          tl_assert(ok);
   5483       }
   5484    } else {
   5485       /* getting */
   5486       for (i = 0; i < szB; i++) {
   5487          ok = get_vbits8(a + i, &vbits8);
   5488          tl_assert(ok);
   5489          ((UChar*)vbits)[i] = vbits8;
   5490       }
   5491       if (is_client_request)
   5492         // The bytes in vbits[] have now been set, so mark them as such.
   5493         MC_(make_mem_defined)(vbits, szB);
   5494    }
   5495 
   5496    return 1;
   5497 }
   5498 
   5499 
   5500 /*------------------------------------------------------------*/
   5501 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
   5502 /*------------------------------------------------------------*/
   5503 
   5504 /* For the memory leak detector, say whether an entire 64k chunk of
   5505    address space is possibly in use, or not.  If in doubt return
   5506    True.
   5507 */
   5508 Bool MC_(is_within_valid_secondary) ( Addr a )
   5509 {
   5510    SecMap* sm = maybe_get_secmap_for ( a );
   5511    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
   5512       /* Definitely not in use. */
   5513       return False;
   5514    } else {
   5515       return True;
   5516    }
   5517 }
   5518 
   5519 
   5520 /* For the memory leak detector, say whether or not a given word
   5521    address is to be regarded as valid. */
   5522 Bool MC_(is_valid_aligned_word) ( Addr a )
   5523 {
   5524    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
   5525    tl_assert(VG_IS_WORD_ALIGNED(a));
   5526    if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
   5527       return False;
   5528    if (sizeof(UWord) == 8) {
   5529       if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
   5530          return False;
   5531    }
   5532    if (UNLIKELY(MC_(in_ignored_range)(a)))
   5533       return False;
   5534    else
   5535       return True;
   5536 }
   5537 
   5538 
   5539 /*------------------------------------------------------------*/
   5540 /*--- Initialisation                                       ---*/
   5541 /*------------------------------------------------------------*/
   5542 
   5543 static void init_shadow_memory ( void )
   5544 {
   5545    Int     i;
   5546    SecMap* sm;
   5547 
   5548    tl_assert(V_BIT_UNDEFINED   == 1);
   5549    tl_assert(V_BIT_DEFINED     == 0);
   5550    tl_assert(V_BITS8_UNDEFINED == 0xFF);
   5551    tl_assert(V_BITS8_DEFINED   == 0);
   5552 
   5553    /* Build the 3 distinguished secondaries */
   5554    sm = &sm_distinguished[SM_DIST_NOACCESS];
   5555    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
   5556 
   5557    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   5558    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
   5559 
   5560    sm = &sm_distinguished[SM_DIST_DEFINED];
   5561    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
   5562 
   5563    /* Set up the primary map. */
   5564    /* These entries gradually get overwritten as the used address
   5565       space expands. */
   5566    for (i = 0; i < N_PRIMARY_MAP; i++)
   5567       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
   5568 
   5569    /* Auxiliary primary maps */
   5570    init_auxmap_L1_L2();
   5571 
   5572    /* auxmap_size = auxmap_used = 0;
   5573       no ... these are statically initialised */
   5574 
   5575    /* Secondary V bit table */
   5576    secVBitTable = createSecVBitTable();
   5577 }
   5578 
   5579 
   5580 /*------------------------------------------------------------*/
   5581 /*--- Sanity check machinery (permanently engaged)         ---*/
   5582 /*------------------------------------------------------------*/
   5583 
   5584 static Bool mc_cheap_sanity_check ( void )
   5585 {
   5586    n_sanity_cheap++;
   5587    PROF_EVENT(MCPE_CHEAP_SANITY_CHECK);
   5588    /* Check for sane operating level */
   5589    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   5590       return False;
   5591    /* nothing else useful we can rapidly check */
   5592    return True;
   5593 }
   5594 
   5595 static Bool mc_expensive_sanity_check ( void )
   5596 {
   5597    Int     i;
   5598    Word    n_secmaps_found;
   5599    SecMap* sm;
   5600    const HChar*  errmsg;
   5601    Bool    bad = False;
   5602 
   5603    if (0) VG_(printf)("expensive sanity check\n");
   5604    if (0) return True;
   5605 
   5606    n_sanity_expensive++;
   5607    PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK);
   5608 
   5609    /* Check for sane operating level */
   5610    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   5611       return False;
   5612 
   5613    /* Check that the 3 distinguished SMs are still as they should be. */
   5614 
   5615    /* Check noaccess DSM. */
   5616    sm = &sm_distinguished[SM_DIST_NOACCESS];
   5617    for (i = 0; i < SM_CHUNKS; i++)
   5618       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
   5619          bad = True;
   5620 
   5621    /* Check undefined DSM. */
   5622    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   5623    for (i = 0; i < SM_CHUNKS; i++)
   5624       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
   5625          bad = True;
   5626 
   5627    /* Check defined DSM. */
   5628    sm = &sm_distinguished[SM_DIST_DEFINED];
   5629    for (i = 0; i < SM_CHUNKS; i++)
   5630       if (sm->vabits8[i] != VA_BITS8_DEFINED)
   5631          bad = True;
   5632 
   5633    if (bad) {
   5634       VG_(printf)("memcheck expensive sanity: "
   5635                   "distinguished_secondaries have changed\n");
   5636       return False;
   5637    }
   5638 
   5639    /* If we're not checking for undefined value errors, the secondary V bit
   5640     * table should be empty. */
   5641    if (MC_(clo_mc_level) == 1) {
   5642       if (0 != VG_(OSetGen_Size)(secVBitTable))
   5643          return False;
   5644    }
   5645 
   5646    /* check the auxiliary maps, very thoroughly */
   5647    n_secmaps_found = 0;
   5648    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
   5649    if (errmsg) {
   5650       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
   5651       return False;
   5652    }
   5653 
   5654    /* n_secmaps_found is now the number referred to by the auxiliary
   5655       primary map.  Now add on the ones referred to by the main
   5656       primary map. */
   5657    for (i = 0; i < N_PRIMARY_MAP; i++) {
   5658       if (primary_map[i] == NULL) {
   5659          bad = True;
   5660       } else {
   5661          if (!is_distinguished_sm(primary_map[i]))
   5662             n_secmaps_found++;
   5663       }
   5664    }
   5665 
   5666    /* check that the number of secmaps issued matches the number that
   5667       are reachable (iow, no secmap leaks) */
   5668    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
   5669       bad = True;
   5670 
   5671    if (bad) {
   5672       VG_(printf)("memcheck expensive sanity: "
   5673                   "apparent secmap leakage\n");
   5674       return False;
   5675    }
   5676 
   5677    if (bad) {
   5678       VG_(printf)("memcheck expensive sanity: "
   5679                   "auxmap covers wrong address space\n");
   5680       return False;
   5681    }
   5682 
   5683    /* there is only one pointer to each secmap (expensive) */
   5684 
   5685    return True;
   5686 }
   5687 
   5688 /*------------------------------------------------------------*/
   5689 /*--- Command line args                                    ---*/
   5690 /*------------------------------------------------------------*/
   5691 
   5692 /* 31 Aug 2015: Vectorised code is now so widespread that
   5693    --partial-loads-ok needs to be enabled by default on all platforms.
   5694    Not doing so causes lots of false errors. */
   5695 Bool          MC_(clo_partial_loads_ok)       = True;
   5696 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
   5697 Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
   5698 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
   5699 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
   5700 UInt          MC_(clo_show_leak_kinds)        = R2S(Possible) | R2S(Unreached);
   5701 UInt          MC_(clo_error_for_leak_kinds)   = R2S(Possible) | R2S(Unreached);
   5702 UInt          MC_(clo_leak_check_heuristics)  =   H2S(LchStdString)
   5703                                                 | H2S( LchLength64)
   5704                                                 | H2S( LchNewArray)
   5705                                                 | H2S( LchMultipleInheritance);
   5706 Bool          MC_(clo_workaround_gcc296_bugs) = False;
   5707 Int           MC_(clo_malloc_fill)            = -1;
   5708 Int           MC_(clo_free_fill)              = -1;
   5709 KeepStacktraces MC_(clo_keep_stacktraces)     = KS_alloc_and_free;
   5710 Int           MC_(clo_mc_level)               = 2;
   5711 Bool          MC_(clo_show_mismatched_frees)  = True;
   5712 Bool          MC_(clo_expensive_definedness_checks) = False;
   5713 
   5714 static const HChar * MC_(parse_leak_heuristics_tokens) =
   5715    "-,stdstring,length64,newarray,multipleinheritance";
   5716 /* The first heuristic value (LchNone) has no keyword, as this is
   5717    a fake heuristic used to collect the blocks found without any
   5718    heuristic. */
   5719 
   5720 static Bool mc_process_cmd_line_options(const HChar* arg)
   5721 {
   5722    const HChar* tmp_str;
   5723    Int   tmp_show;
   5724 
   5725    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   5726 
   5727    /* Set MC_(clo_mc_level):
   5728          1 = A bit tracking only
   5729          2 = A and V bit tracking, but no V bit origins
   5730          3 = A and V bit tracking, and V bit origins
   5731 
   5732       Do this by inspecting --undef-value-errors= and
   5733       --track-origins=.  Reject the case --undef-value-errors=no
   5734       --track-origins=yes as meaningless.
   5735    */
   5736    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
   5737       if (MC_(clo_mc_level) == 3) {
   5738          goto bad_level;
   5739       } else {
   5740          MC_(clo_mc_level) = 1;
   5741          return True;
   5742       }
   5743    }
   5744    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
   5745       if (MC_(clo_mc_level) == 1)
   5746          MC_(clo_mc_level) = 2;
   5747       return True;
   5748    }
   5749    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
   5750       if (MC_(clo_mc_level) == 3)
   5751          MC_(clo_mc_level) = 2;
   5752       return True;
   5753    }
   5754    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
   5755       if (MC_(clo_mc_level) == 1) {
   5756          goto bad_level;
   5757       } else {
   5758          MC_(clo_mc_level) = 3;
   5759          return True;
   5760       }
   5761    }
   5762 
   5763         if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
   5764    else if VG_USET_CLO(arg, "--errors-for-leak-kinds",
   5765                        MC_(parse_leak_kinds_tokens),
   5766                        MC_(clo_error_for_leak_kinds)) {}
   5767    else if VG_USET_CLO(arg, "--show-leak-kinds",
   5768                        MC_(parse_leak_kinds_tokens),
   5769                        MC_(clo_show_leak_kinds)) {}
   5770    else if VG_USET_CLO(arg, "--leak-check-heuristics",
   5771                        MC_(parse_leak_heuristics_tokens),
   5772                        MC_(clo_leak_check_heuristics)) {}
   5773    else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
   5774       if (tmp_show) {
   5775          MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
   5776       } else {
   5777          MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
   5778       }
   5779    }
   5780    else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
   5781       if (tmp_show) {
   5782          MC_(clo_show_leak_kinds) |= R2S(Possible);
   5783       } else {
   5784          MC_(clo_show_leak_kinds) &= ~R2S(Possible);
   5785       }
   5786    }
   5787    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
   5788                                             MC_(clo_workaround_gcc296_bugs)) {}
   5789 
   5790    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
   5791                                                0, 10*1000*1000*1000LL) {}
   5792 
   5793    else if VG_BINT_CLO(arg, "--freelist-big-blocks",
   5794                        MC_(clo_freelist_big_blocks),
   5795                        0, 10*1000*1000*1000LL) {}
   5796 
   5797    else if VG_XACT_CLO(arg, "--leak-check=no",
   5798                             MC_(clo_leak_check), LC_Off) {}
   5799    else if VG_XACT_CLO(arg, "--leak-check=summary",
   5800                             MC_(clo_leak_check), LC_Summary) {}
   5801    else if VG_XACT_CLO(arg, "--leak-check=yes",
   5802                             MC_(clo_leak_check), LC_Full) {}
   5803    else if VG_XACT_CLO(arg, "--leak-check=full",
   5804                             MC_(clo_leak_check), LC_Full) {}
   5805 
   5806    else if VG_XACT_CLO(arg, "--leak-resolution=low",
   5807                             MC_(clo_leak_resolution), Vg_LowRes) {}
   5808    else if VG_XACT_CLO(arg, "--leak-resolution=med",
   5809                             MC_(clo_leak_resolution), Vg_MedRes) {}
   5810    else if VG_XACT_CLO(arg, "--leak-resolution=high",
   5811                             MC_(clo_leak_resolution), Vg_HighRes) {}
   5812 
   5813    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
   5814       Bool ok = parse_ignore_ranges(tmp_str);
   5815       if (!ok) {
   5816          VG_(message)(Vg_DebugMsg,
   5817             "ERROR: --ignore-ranges: "
   5818             "invalid syntax, or end <= start in range\n");
   5819          return False;
   5820       }
   5821       if (gIgnoredAddressRanges) {
   5822          UInt i;
   5823          for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
   5824             UWord val     = IAR_INVALID;
   5825             UWord key_min = ~(UWord)0;
   5826             UWord key_max = (UWord)0;
   5827             VG_(indexRangeMap)( &key_min, &key_max, &val,
   5828                                 gIgnoredAddressRanges, i );
   5829             tl_assert(key_min <= key_max);
   5830             UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
   5831             if (key_max - key_min > limit && val == IAR_CommandLine) {
   5832                VG_(message)(Vg_DebugMsg,
   5833                   "ERROR: --ignore-ranges: suspiciously large range:\n");
   5834                VG_(message)(Vg_DebugMsg,
   5835                    "       0x%lx-0x%lx (size %lu)\n", key_min, key_max,
   5836                    key_max - key_min + 1);
   5837                return False;
   5838             }
   5839          }
   5840       }
   5841    }
   5842 
   5843    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
   5844    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
   5845 
   5846    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
   5847                        MC_(clo_keep_stacktraces), KS_alloc) {}
   5848    else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
   5849                        MC_(clo_keep_stacktraces), KS_free) {}
   5850    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
   5851                        MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
   5852    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
   5853                        MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
   5854    else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
   5855                        MC_(clo_keep_stacktraces), KS_none) {}
   5856 
   5857    else if VG_BOOL_CLO(arg, "--show-mismatched-frees",
   5858                        MC_(clo_show_mismatched_frees)) {}
   5859    else if VG_BOOL_CLO(arg, "--expensive-definedness-checks",
   5860                        MC_(clo_expensive_definedness_checks)) {}
   5861 
   5862    else
   5863       return VG_(replacement_malloc_process_cmd_line_option)(arg);
   5864 
   5865    return True;
   5866 
   5867 
   5868   bad_level:
   5869    VG_(fmsg_bad_option)(arg,
   5870       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
   5871 }
   5872 
   5873 static void mc_print_usage(void)
   5874 {
   5875    VG_(printf)(
   5876 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
   5877 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
   5878 "    --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
   5879 "                                            [definite,possible]\n"
   5880 "    --errors-for-leak-kinds=kind1,kind2,..  which leak kinds are errors?\n"
   5881 "                                            [definite,possible]\n"
   5882 "        where kind is one of:\n"
   5883 "          definite indirect possible reachable all none\n"
   5884 "    --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
   5885 "        improving leak search false positive [all]\n"
   5886 "        where heur is one of:\n"
   5887 "          stdstring length64 newarray multipleinheritance all none\n"
   5888 "    --show-reachable=yes             same as --show-leak-kinds=all\n"
   5889 "    --show-reachable=no --show-possibly-lost=yes\n"
   5890 "                                     same as --show-leak-kinds=definite,possible\n"
   5891 "    --show-reachable=no --show-possibly-lost=no\n"
   5892 "                                     same as --show-leak-kinds=definite\n"
   5893 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
   5894 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
   5895 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [yes]\n"
   5896 "    --expensive-definedness-checks=no|yes\n"
   5897 "                                     Use extra-precise definedness tracking [no]\n"
   5898 "    --freelist-vol=<number>          volume of freed blocks queue     [20000000]\n"
   5899 "    --freelist-big-blocks=<number>   releases first blocks with size>= [1000000]\n"
   5900 "    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
   5901 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
   5902 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
   5903 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
   5904 "    --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
   5905 "        stack trace(s) to keep for malloc'd/free'd areas       [alloc-and-free]\n"
   5906 "    --show-mismatched-frees=no|yes   show frees that don't match the allocator? [yes]\n"
   5907    );
   5908 }
   5909 
   5910 static void mc_print_debug_usage(void)
   5911 {
   5912    VG_(printf)(
   5913 "    (none)\n"
   5914    );
   5915 }
   5916 
   5917 
   5918 /*------------------------------------------------------------*/
   5919 /*--- Client blocks                                        ---*/
   5920 /*------------------------------------------------------------*/
   5921 
   5922 /* Client block management:
   5923 
   5924    This is managed as an expanding array of client block descriptors.
   5925    Indices of live descriptors are issued to the client, so it can ask
   5926    to free them later.  Therefore we cannot slide live entries down
   5927    over dead ones.  Instead we must use free/inuse flags and scan for
   5928    an empty slot at allocation time.  This in turn means allocation is
   5929    relatively expensive, so we hope this does not happen too often.
   5930 
   5931    An unused block has start == size == 0
   5932 */
   5933 
   5934 /* type CGenBlock is defined in mc_include.h */
   5935 
   5936 /* This subsystem is self-initialising. */
   5937 static UWord      cgb_size = 0;
   5938 static UWord      cgb_used = 0;
   5939 static CGenBlock* cgbs     = NULL;
   5940 
   5941 /* Stats for this subsystem. */
   5942 static ULong cgb_used_MAX = 0;   /* Max in use. */
   5943 static ULong cgb_allocs   = 0;   /* Number of allocs. */
   5944 static ULong cgb_discards = 0;   /* Number of discards. */
   5945 static ULong cgb_search   = 0;   /* Number of searches. */
   5946 
   5947 
   5948 /* Get access to the client block array. */
   5949 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
   5950                                  /*OUT*/UWord* nBlocks )
   5951 {
   5952    *blocks  = cgbs;
   5953    *nBlocks = cgb_used;
   5954 }
   5955 
   5956 
   5957 static
   5958 Int alloc_client_block ( void )
   5959 {
   5960    UWord      i, sz_new;
   5961    CGenBlock* cgbs_new;
   5962 
   5963    cgb_allocs++;
   5964 
   5965    for (i = 0; i < cgb_used; i++) {
   5966       cgb_search++;
   5967       if (cgbs[i].start == 0 && cgbs[i].size == 0)
   5968          return i;
   5969    }
   5970 
   5971    /* Not found.  Try to allocate one at the end. */
   5972    if (cgb_used < cgb_size) {
   5973       cgb_used++;
   5974       return cgb_used-1;
   5975    }
   5976 
   5977    /* Ok, we have to allocate a new one. */
   5978    tl_assert(cgb_used == cgb_size);
   5979    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
   5980 
   5981    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
   5982    for (i = 0; i < cgb_used; i++)
   5983       cgbs_new[i] = cgbs[i];
   5984 
   5985    if (cgbs != NULL)
   5986       VG_(free)( cgbs );
   5987    cgbs = cgbs_new;
   5988 
   5989    cgb_size = sz_new;
   5990    cgb_used++;
   5991    if (cgb_used > cgb_used_MAX)
   5992       cgb_used_MAX = cgb_used;
   5993    return cgb_used-1;
   5994 }
   5995 
   5996 
   5997 static void show_client_block_stats ( void )
   5998 {
   5999    VG_(message)(Vg_DebugMsg,
   6000       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
   6001       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
   6002    );
   6003 }
   6004 static void print_monitor_help ( void )
   6005 {
   6006    VG_(gdb_printf)
   6007       (
   6008 "\n"
   6009 "memcheck monitor commands:\n"
   6010 "  xb <addr> [<len>]\n"
   6011 "        prints validity bits for <len> (or 1) bytes at <addr>\n"
   6012 "            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
   6013 "        Then prints the bytes values below the corresponding validity bits\n"
   6014 "        in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
   6015 "        Example: xb 0x8049c78 10\n"
   6016 "  get_vbits <addr> [<len>]\n"
   6017 "        Similar to xb, but only prints the validity bytes by group of 4.\n"
   6018 "  make_memory [noaccess|undefined\n"
   6019 "                     |defined|Definedifaddressable] <addr> [<len>]\n"
   6020 "        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
   6021 "  check_memory [addressable|defined] <addr> [<len>]\n"
   6022 "        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
   6023 "            and outputs a description of <addr>\n"
   6024 "  leak_check [full*|summary]\n"
   6025 "                [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
   6026 "                [heuristics heur1,heur2,...]\n"
   6027 "                [increased*|changed|any]\n"
   6028 "                [unlimited*|limited <max_loss_records_output>]\n"
   6029 "            * = defaults\n"
   6030 "       where kind is one of:\n"
   6031 "         definite indirect possible reachable all none\n"
   6032 "       where heur is one of:\n"
   6033 "         stdstring length64 newarray multipleinheritance all none*\n"
   6034 "       Examples: leak_check\n"
   6035 "                 leak_check summary any\n"
   6036 "                 leak_check full kinds indirect,possible\n"
   6037 "                 leak_check full reachable any limited 100\n"
   6038 "  block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
   6039 "                [unlimited*|limited <max_blocks>]\n"
   6040 "                [heuristics heur1,heur2,...]\n"
   6041 "        after a leak search, shows the list of blocks of <loss_record_nr>\n"
   6042 "        (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
   6043 "        With heuristics, only shows the blocks found via heur1,heur2,...\n"
   6044 "            * = defaults\n"
   6045 "  who_points_at <addr> [<len>]\n"
   6046 "        shows places pointing inside <len> (default 1) bytes at <addr>\n"
   6047 "        (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
   6048 "         with len > 1, will also show \"interior pointers\")\n"
   6049 "\n");
   6050 }
   6051 
   6052 /* Print szB bytes at address, with a format similar to the gdb command
   6053    x /<szB>xb address.
   6054    res[i] == 1 indicates the corresponding byte is addressable. */
   6055 static void gdb_xb (Addr address, SizeT szB, Int res[])
   6056 {
   6057    UInt i;
   6058 
   6059    for (i = 0; i < szB; i++) {
   6060       UInt bnr = i % 8;
   6061       if (bnr == 0) {
   6062          if (i != 0)
   6063             VG_(printf) ("\n"); // Terminate previous line
   6064          VG_(printf) ("%p:", (void*)(address+i));
   6065       }
   6066       if (res[i] == 1)
   6067          VG_(printf) ("\t0x%02x", *(UChar*)(address+i));
   6068       else
   6069          VG_(printf) ("\t0x??");
   6070    }
   6071    VG_(printf) ("\n"); // Terminate previous line
   6072 }
   6073 
   6074 
   6075 /* Returns the address of the next non space character,
   6076    or address of the string terminator. */
   6077 static HChar* next_non_space (HChar *s)
   6078 {
   6079    while (*s && *s == ' ')
   6080       s++;
   6081    return s;
   6082 }
   6083 
   6084 /* Parse an integer slice, i.e. a single integer or a range of integer.
   6085    Syntax is:
   6086        <integer>[..<integer> ]
   6087    (spaces are allowed before and/or after ..).
   6088    Return True if range correctly parsed, False otherwise. */
   6089 static Bool VG_(parse_slice) (HChar* s, HChar** saveptr,
   6090                               UInt *from, UInt *to)
   6091 {
   6092    HChar* wl;
   6093    HChar *endptr;
   6094    endptr = NULL;////
   6095    wl = VG_(strtok_r) (s, " ", saveptr);
   6096 
   6097    /* slice must start with an integer. */
   6098    if (wl == NULL) {
   6099       VG_(gdb_printf) ("expecting integer or slice <from>..<to>\n");
   6100       return False;
   6101    }
   6102    *from = VG_(strtoull10) (wl, &endptr);
   6103    if (endptr == wl) {
   6104       VG_(gdb_printf) ("invalid integer or slice <from>..<to>\n");
   6105       return False;
   6106    }
   6107 
   6108    if (*endptr == '\0' && *next_non_space(*saveptr) != '.') {
   6109       /* wl token is an integer terminating the string
   6110          or else next token does not start with .
   6111          In both cases, the slice is a single integer. */
   6112       *to = *from;
   6113       return True;
   6114    }
   6115 
   6116    if (*endptr == '\0') {
   6117       // iii ..    => get the next token
   6118       wl =  VG_(strtok_r) (NULL, " .", saveptr);
   6119    } else {
   6120       // It must be iii..
   6121       if (*endptr != '.' && *(endptr+1) != '.') {
   6122          VG_(gdb_printf) ("expecting slice <from>..<to>\n");
   6123          return False;
   6124       }
   6125       if ( *(endptr+2) == ' ') {
   6126          // It must be iii.. jjj  => get the next token
   6127          wl =  VG_(strtok_r) (NULL, " .", saveptr);
   6128       } else {
   6129          // It must be iii..jjj
   6130          wl = endptr+2;
   6131       }
   6132    }
   6133 
   6134    *to = VG_(strtoull10) (wl, &endptr);
   6135    if (*endptr != '\0') {
   6136       VG_(gdb_printf) ("missing/wrong 'to' of slice <from>..<to>\n");
   6137       return False;
   6138    }
   6139 
   6140    if (*from > *to) {
   6141       VG_(gdb_printf) ("<from> cannot be bigger than <to> "
   6142                        "in slice <from>..<to>\n");
   6143       return False;
   6144    }
   6145 
   6146    return True;
   6147 }
   6148 
   6149 /* return True if request recognised, False otherwise */
   6150 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
   6151 {
   6152    HChar* wcmd;
   6153    HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */
   6154    HChar *ssaveptr;
   6155 
   6156    VG_(strcpy) (s, req);
   6157 
   6158    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
   6159    /* NB: if possible, avoid introducing a new command below which
   6160       starts with the same first letter(s) as an already existing
   6161       command. This ensures a shorter abbreviation for the user. */
   6162    switch (VG_(keyword_id)
   6163            ("help get_vbits leak_check make_memory check_memory "
   6164             "block_list who_points_at xb",
   6165             wcmd, kwd_report_duplicated_matches)) {
   6166    case -2: /* multiple matches */
   6167       return True;
   6168    case -1: /* not found */
   6169       return False;
   6170    case  0: /* help */
   6171       print_monitor_help();
   6172       return True;
   6173    case  1: { /* get_vbits */
   6174       Addr address;
   6175       SizeT szB = 1;
   6176       if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
   6177          UChar vbits;
   6178          Int i;
   6179          Int unaddressable = 0;
   6180          for (i = 0; i < szB; i++) {
   6181             Int res = mc_get_or_set_vbits_for_client
   6182                (address+i, (Addr) &vbits, 1,
   6183                 False, /* get them */
   6184                 False  /* is client request */ );
   6185             /* we are before the first character on next line, print a \n. */
   6186             if ((i % 32) == 0 && i != 0)
   6187                VG_(printf) ("\n");
   6188             /* we are before the next block of 4 starts, print a space. */
   6189             else if ((i % 4) == 0 && i != 0)
   6190                VG_(printf) (" ");
   6191             if (res == 1) {
   6192                VG_(printf) ("%02x", vbits);
   6193             } else {
   6194                tl_assert(3 == res);
   6195                unaddressable++;
   6196                VG_(printf) ("__");
   6197             }
   6198          }
   6199          VG_(printf) ("\n");
   6200          if (unaddressable) {
   6201             VG_(printf)
   6202                ("Address %p len %lu has %d bytes unaddressable\n",
   6203                 (void *)address, szB, unaddressable);
   6204          }
   6205       }
   6206       return True;
   6207    }
   6208    case  2: { /* leak_check */
   6209       Int err = 0;
   6210       LeakCheckParams lcp;
   6211       HChar* kw;
   6212 
   6213       lcp.mode               = LC_Full;
   6214       lcp.show_leak_kinds    = R2S(Possible) | R2S(Unreached);
   6215       lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
   6216       lcp.heuristics         = 0;
   6217       lcp.deltamode          = LCD_Increased;
   6218       lcp.max_loss_records_output = 999999999;
   6219       lcp.requested_by_monitor_command = True;
   6220 
   6221       for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6222            kw != NULL;
   6223            kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
   6224          switch (VG_(keyword_id)
   6225                  ("full summary "
   6226                   "kinds reachable possibleleak definiteleak "
   6227                   "heuristics "
   6228                   "increased changed any "
   6229                   "unlimited limited ",
   6230                   kw, kwd_report_all)) {
   6231          case -2: err++; break;
   6232          case -1: err++; break;
   6233          case  0: /* full */
   6234             lcp.mode = LC_Full; break;
   6235          case  1: /* summary */
   6236             lcp.mode = LC_Summary; break;
   6237          case  2: { /* kinds */
   6238             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6239             if (wcmd == NULL
   6240                 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
   6241                                         True/*allow_all*/,
   6242                                         wcmd,
   6243                                         &lcp.show_leak_kinds)) {
   6244                VG_(gdb_printf) ("missing or malformed leak kinds set\n");
   6245                err++;
   6246             }
   6247             break;
   6248          }
   6249          case  3: /* reachable */
   6250             lcp.show_leak_kinds = MC_(all_Reachedness)();
   6251             break;
   6252          case  4: /* possibleleak */
   6253             lcp.show_leak_kinds
   6254                = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
   6255             break;
   6256          case  5: /* definiteleak */
   6257             lcp.show_leak_kinds = R2S(Unreached);
   6258             break;
   6259          case  6: { /* heuristics */
   6260             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6261             if (wcmd == NULL
   6262                 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
   6263                                         True,/*allow_all*/
   6264                                         wcmd,
   6265                                         &lcp.heuristics)) {
   6266                VG_(gdb_printf) ("missing or malformed heuristics set\n");
   6267                err++;
   6268             }
   6269             break;
   6270          }
   6271          case  7: /* increased */
   6272             lcp.deltamode = LCD_Increased; break;
   6273          case  8: /* changed */
   6274             lcp.deltamode = LCD_Changed; break;
   6275          case  9: /* any */
   6276             lcp.deltamode = LCD_Any; break;
   6277          case 10: /* unlimited */
   6278             lcp.max_loss_records_output = 999999999; break;
   6279          case 11: { /* limited */
   6280             Int int_value;
   6281             const HChar* endptr;
   6282 
   6283             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6284             if (wcmd == NULL) {
   6285                int_value = 0;
   6286                endptr = "empty"; /* to report an error below */
   6287             } else {
   6288                HChar *the_end;
   6289                int_value = VG_(strtoll10) (wcmd, &the_end);
   6290                endptr = the_end;
   6291             }
   6292             if (*endptr != '\0')
   6293                VG_(gdb_printf) ("missing or malformed integer value\n");
   6294             else if (int_value > 0)
   6295                lcp.max_loss_records_output = (UInt) int_value;
   6296             else
   6297                VG_(gdb_printf) ("max_loss_records_output must be >= 1,"
   6298                                 " got %d\n", int_value);
   6299             break;
   6300          }
   6301          default:
   6302             tl_assert (0);
   6303          }
   6304       }
   6305       if (!err)
   6306          MC_(detect_memory_leaks)(tid, &lcp);
   6307       return True;
   6308    }
   6309 
   6310    case  3: { /* make_memory */
   6311       Addr address;
   6312       SizeT szB = 1;
   6313       Int kwdid = VG_(keyword_id)
   6314          ("noaccess undefined defined Definedifaddressable",
   6315           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
   6316       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
   6317          return True;
   6318       switch (kwdid) {
   6319       case -2: break;
   6320       case -1: break;
   6321       case  0: MC_(make_mem_noaccess) (address, szB); break;
   6322       case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
   6323                                                     MC_OKIND_USER ); break;
   6324       case  2: MC_(make_mem_defined) ( address, szB ); break;
   6325       case  3: make_mem_defined_if_addressable ( address, szB ); break;;
   6326       default: tl_assert(0);
   6327       }
   6328       return True;
   6329    }
   6330 
   6331    case  4: { /* check_memory */
   6332       Addr address;
   6333       SizeT szB = 1;
   6334       Addr bad_addr;
   6335       UInt okind;
   6336       const HChar* src;
   6337       UInt otag;
   6338       UInt ecu;
   6339       ExeContext* origin_ec;
   6340       MC_ReadResult res;
   6341 
   6342       Int kwdid = VG_(keyword_id)
   6343          ("addressable defined",
   6344           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
   6345       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
   6346          return True;
   6347       switch (kwdid) {
   6348       case -2: break;
   6349       case -1: break;
   6350       case  0: /* addressable */
   6351          if (is_mem_addressable ( address, szB, &bad_addr ))
   6352             VG_(printf) ("Address %p len %lu addressable\n",
   6353                              (void *)address, szB);
   6354          else
   6355             VG_(printf)
   6356                ("Address %p len %lu not addressable:\nbad address %p\n",
   6357                 (void *)address, szB, (void *) bad_addr);
   6358          MC_(pp_describe_addr) (address);
   6359          break;
   6360       case  1: /* defined */
   6361          res = is_mem_defined ( address, szB, &bad_addr, &otag );
   6362          if (MC_AddrErr == res)
   6363             VG_(printf)
   6364                ("Address %p len %lu not addressable:\nbad address %p\n",
   6365                 (void *)address, szB, (void *) bad_addr);
   6366          else if (MC_ValueErr == res) {
   6367             okind = otag & 3;
   6368             switch (okind) {
   6369             case MC_OKIND_STACK:
   6370                src = " was created by a stack allocation"; break;
   6371             case MC_OKIND_HEAP:
   6372                src = " was created by a heap allocation"; break;
   6373             case MC_OKIND_USER:
   6374                src = " was created by a client request"; break;
   6375             case MC_OKIND_UNKNOWN:
   6376                src = ""; break;
   6377             default: tl_assert(0);
   6378             }
   6379             VG_(printf)
   6380                ("Address %p len %lu not defined:\n"
   6381                 "Uninitialised value at %p%s\n",
   6382                 (void *)address, szB, (void *) bad_addr, src);
   6383             ecu = otag & ~3;
   6384             if (VG_(is_plausible_ECU)(ecu)) {
   6385                origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
   6386                VG_(pp_ExeContext)( origin_ec );
   6387             }
   6388          }
   6389          else
   6390             VG_(printf) ("Address %p len %lu defined\n",
   6391                          (void *)address, szB);
   6392          MC_(pp_describe_addr) (address);
   6393          break;
   6394       default: tl_assert(0);
   6395       }
   6396       return True;
   6397    }
   6398 
   6399    case  5: { /* block_list */
   6400       HChar* wl;
   6401       HChar *the_end;
   6402       UInt lr_nr_from = 0;
   6403       UInt lr_nr_to = 0;
   6404 
   6405       if (VG_(parse_slice) (NULL, &ssaveptr, &lr_nr_from, &lr_nr_to)) {
   6406          UInt limit_blocks = 999999999;
   6407          Int int_value;
   6408          UInt heuristics = 0;
   6409 
   6410          for (wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6411               wl != NULL;
   6412               wl = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
   6413             switch (VG_(keyword_id) ("unlimited limited heuristics ",
   6414                                      wl,  kwd_report_all)) {
   6415             case -2: return True;
   6416             case -1: return True;
   6417             case  0: /* unlimited */
   6418                limit_blocks = 999999999; break;
   6419             case  1: /* limited */
   6420                wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6421                if (wcmd == NULL) {
   6422                   VG_(gdb_printf) ("missing integer value\n");
   6423                   return True;
   6424                }
   6425                int_value = VG_(strtoll10) (wcmd, &the_end);
   6426                if (*the_end != '\0') {
   6427                   VG_(gdb_printf) ("malformed integer value\n");
   6428                   return True;
   6429                }
   6430                if (int_value <= 0) {
   6431                   VG_(gdb_printf) ("max_blocks must be >= 1,"
   6432                                    " got %d\n", int_value);
   6433                   return True;
   6434                }
   6435                limit_blocks = (UInt) int_value;
   6436                break;
   6437             case  2: /* heuristics */
   6438                wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6439                if (wcmd == NULL
   6440                    || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
   6441                                            True,/*allow_all*/
   6442                                            wcmd,
   6443                                            &heuristics)) {
   6444                   VG_(gdb_printf) ("missing or malformed heuristics set\n");
   6445                   return True;
   6446                }
   6447                break;
   6448             default:
   6449                tl_assert (0);
   6450             }
   6451          }
   6452          /* substract 1 from lr_nr_from/lr_nr_to  as what is shown to the user
   6453             is 1 more than the index in lr_array. */
   6454          if (lr_nr_from == 0 || ! MC_(print_block_list) (lr_nr_from-1,
   6455                                                          lr_nr_to-1,
   6456                                                          limit_blocks,
   6457                                                          heuristics))
   6458             VG_(gdb_printf) ("invalid loss record nr\n");
   6459       }
   6460       return True;
   6461    }
   6462 
   6463    case  6: { /* who_points_at */
   6464       Addr address;
   6465       SizeT szB = 1;
   6466 
   6467       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
   6468          return True;
   6469       if (address == (Addr) 0) {
   6470          VG_(gdb_printf) ("Cannot search who points at 0x0\n");
   6471          return True;
   6472       }
   6473       MC_(who_points_at) (address, szB);
   6474       return True;
   6475    }
   6476 
   6477    case  7: { /* xb */
   6478       Addr address;
   6479       SizeT szB = 1;
   6480       if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
   6481          UChar vbits[8];
   6482          Int res[8];
   6483          Int i;
   6484          Int unaddressable = 0;
   6485          for (i = 0; i < szB; i++) {
   6486             Int bnr = i % 8;
   6487             res[bnr] = mc_get_or_set_vbits_for_client
   6488                (address+i, (Addr) &vbits[bnr], 1,
   6489                 False, /* get them */
   6490                 False  /* is client request */ );
   6491             /* We going to print the first vabits of a new line.
   6492                Terminate the previous line if needed: prints a line with the
   6493                address and the data. */
   6494             if (bnr == 0) {
   6495                if (i != 0) {
   6496                   VG_(printf) ("\n");
   6497                   gdb_xb (address + i - 8, 8, res);
   6498                }
   6499                VG_(printf) ("\t"); // To align VABITS with gdb_xb layout
   6500             }
   6501             if (res[bnr] == 1) {
   6502                VG_(printf) ("\t  %02x", vbits[bnr]);
   6503             } else {
   6504                tl_assert(3 == res[bnr]);
   6505                unaddressable++;
   6506                VG_(printf) ("\t  __");
   6507             }
   6508          }
   6509          VG_(printf) ("\n");
   6510          if (szB % 8 == 0 && szB > 0)
   6511             gdb_xb (address + szB - 8, 8, res);
   6512          else
   6513             gdb_xb (address + szB - szB % 8, szB % 8, res);
   6514          if (unaddressable) {
   6515             VG_(printf)
   6516                ("Address %p len %lu has %d bytes unaddressable\n",
   6517                 (void *)address, szB, unaddressable);
   6518          }
   6519       }
   6520       return True;
   6521    }
   6522 
   6523    default:
   6524       tl_assert(0);
   6525       return False;
   6526    }
   6527 }
   6528 
   6529 /*------------------------------------------------------------*/
   6530 /*--- Client requests                                      ---*/
   6531 /*------------------------------------------------------------*/
   6532 
   6533 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
   6534 {
   6535    Int   i;
   6536    Addr  bad_addr;
   6537 
   6538    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
   6539        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
   6540        && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
   6541        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
   6542        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
   6543        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
   6544        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
   6545        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
   6546        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
   6547        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
   6548        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
   6549        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
   6550        && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0]
   6551        && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
   6552        && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
   6553       return False;
   6554 
   6555    switch (arg[0]) {
   6556       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
   6557          Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
   6558          if (!ok)
   6559             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
   6560          *ret = ok ? (UWord)NULL : bad_addr;
   6561          break;
   6562       }
   6563 
   6564       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
   6565          Bool errorV    = False;
   6566          Addr bad_addrV = 0;
   6567          UInt otagV     = 0;
   6568          Bool errorA    = False;
   6569          Addr bad_addrA = 0;
   6570          is_mem_defined_comprehensive(
   6571             arg[1], arg[2],
   6572             &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
   6573          );
   6574          if (errorV) {
   6575             MC_(record_user_error) ( tid, bad_addrV,
   6576                                      /*isAddrErr*/False, otagV );
   6577          }
   6578          if (errorA) {
   6579             MC_(record_user_error) ( tid, bad_addrA,
   6580                                      /*isAddrErr*/True, 0 );
   6581          }
   6582          /* Return the lower of the two erring addresses, if any. */
   6583          *ret = 0;
   6584          if (errorV && !errorA) {
   6585             *ret = bad_addrV;
   6586          }
   6587          if (!errorV && errorA) {
   6588             *ret = bad_addrA;
   6589          }
   6590          if (errorV && errorA) {
   6591             *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
   6592          }
   6593          break;
   6594       }
   6595 
   6596       case VG_USERREQ__DO_LEAK_CHECK: {
   6597          LeakCheckParams lcp;
   6598 
   6599          if (arg[1] == 0)
   6600             lcp.mode = LC_Full;
   6601          else if (arg[1] == 1)
   6602             lcp.mode = LC_Summary;
   6603          else {
   6604             VG_(message)(Vg_UserMsg,
   6605                          "Warning: unknown memcheck leak search mode\n");
   6606             lcp.mode = LC_Full;
   6607          }
   6608 
   6609          lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
   6610          lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
   6611          lcp.heuristics = MC_(clo_leak_check_heuristics);
   6612 
   6613          if (arg[2] == 0)
   6614             lcp.deltamode = LCD_Any;
   6615          else if (arg[2] == 1)
   6616             lcp.deltamode = LCD_Increased;
   6617          else if (arg[2] == 2)
   6618             lcp.deltamode = LCD_Changed;
   6619          else {
   6620             VG_(message)
   6621                (Vg_UserMsg,
   6622                 "Warning: unknown memcheck leak search deltamode\n");
   6623             lcp.deltamode = LCD_Any;
   6624          }
   6625          lcp.max_loss_records_output = 999999999;
   6626          lcp.requested_by_monitor_command = False;
   6627 
   6628          MC_(detect_memory_leaks)(tid, &lcp);
   6629          *ret = 0; /* return value is meaningless */
   6630          break;
   6631       }
   6632 
   6633       case VG_USERREQ__MAKE_MEM_NOACCESS:
   6634          MC_(make_mem_noaccess) ( arg[1], arg[2] );
   6635          *ret = -1;
   6636          break;
   6637 
   6638       case VG_USERREQ__MAKE_MEM_UNDEFINED:
   6639          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
   6640                                               MC_OKIND_USER );
   6641          *ret = -1;
   6642          break;
   6643 
   6644       case VG_USERREQ__MAKE_MEM_DEFINED:
   6645          MC_(make_mem_defined) ( arg[1], arg[2] );
   6646          *ret = -1;
   6647          break;
   6648 
   6649       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
   6650          make_mem_defined_if_addressable ( arg[1], arg[2] );
   6651          *ret = -1;
   6652          break;
   6653 
   6654       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
   6655          if (arg[1] != 0 && arg[2] != 0) {
   6656             i = alloc_client_block();
   6657             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
   6658             cgbs[i].start = arg[1];
   6659             cgbs[i].size  = arg[2];
   6660             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
   6661             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
   6662             *ret = i;
   6663          } else
   6664             *ret = -1;
   6665          break;
   6666 
   6667       case VG_USERREQ__DISCARD: /* discard */
   6668          if (cgbs == NULL
   6669              || arg[2] >= cgb_used ||
   6670              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
   6671             *ret = 1;
   6672          } else {
   6673             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
   6674             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
   6675             VG_(free)(cgbs[arg[2]].desc);
   6676             cgb_discards++;
   6677             *ret = 0;
   6678          }
   6679          break;
   6680 
   6681       case VG_USERREQ__GET_VBITS:
   6682          *ret = mc_get_or_set_vbits_for_client
   6683                    ( arg[1], arg[2], arg[3],
   6684                      False /* get them */,
   6685                      True /* is client request */ );
   6686          break;
   6687 
   6688       case VG_USERREQ__SET_VBITS:
   6689          *ret = mc_get_or_set_vbits_for_client
   6690                    ( arg[1], arg[2], arg[3],
   6691                      True /* set them */,
   6692                      True /* is client request */ );
   6693          break;
   6694 
   6695       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
   6696          UWord** argp = (UWord**)arg;
   6697          // MC_(bytes_leaked) et al were set by the last leak check (or zero
   6698          // if no prior leak checks performed).
   6699          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
   6700          *argp[2] = MC_(bytes_dubious);
   6701          *argp[3] = MC_(bytes_reachable);
   6702          *argp[4] = MC_(bytes_suppressed);
   6703          // there is no argp[5]
   6704          //*argp[5] = MC_(bytes_indirect);
   6705          // XXX need to make *argp[1-4] defined;  currently done in the
   6706          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
   6707          *ret = 0;
   6708          return True;
   6709       }
   6710       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
   6711          UWord** argp = (UWord**)arg;
   6712          // MC_(blocks_leaked) et al were set by the last leak check (or zero
   6713          // if no prior leak checks performed).
   6714          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
   6715          *argp[2] = MC_(blocks_dubious);
   6716          *argp[3] = MC_(blocks_reachable);
   6717          *argp[4] = MC_(blocks_suppressed);
   6718          // there is no argp[5]
   6719          //*argp[5] = MC_(blocks_indirect);
   6720          // XXX need to make *argp[1-4] defined;  currently done in the
   6721          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
   6722          *ret = 0;
   6723          return True;
   6724       }
   6725       case VG_USERREQ__MALLOCLIKE_BLOCK: {
   6726          Addr p         = (Addr)arg[1];
   6727          SizeT sizeB    =       arg[2];
   6728          UInt rzB       =       arg[3];
   6729          Bool is_zeroed = (Bool)arg[4];
   6730 
   6731          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
   6732                           MC_AllocCustom, MC_(malloc_list) );
   6733          if (rzB > 0) {
   6734             MC_(make_mem_noaccess) ( p - rzB, rzB);
   6735             MC_(make_mem_noaccess) ( p + sizeB, rzB);
   6736          }
   6737          return True;
   6738       }
   6739       case VG_USERREQ__RESIZEINPLACE_BLOCK: {
   6740          Addr p         = (Addr)arg[1];
   6741          SizeT oldSizeB =       arg[2];
   6742          SizeT newSizeB =       arg[3];
   6743          UInt rzB       =       arg[4];
   6744 
   6745          MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
   6746          return True;
   6747       }
   6748       case VG_USERREQ__FREELIKE_BLOCK: {
   6749          Addr p         = (Addr)arg[1];
   6750          UInt rzB       =       arg[2];
   6751 
   6752          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
   6753          return True;
   6754       }
   6755 
   6756       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
   6757          HChar* s  = (HChar*)arg[1];
   6758          Addr  dst = (Addr) arg[2];
   6759          Addr  src = (Addr) arg[3];
   6760          SizeT len = (SizeT)arg[4];
   6761          MC_(record_overlap_error)(tid, s, src, dst, len);
   6762          return True;
   6763       }
   6764 
   6765       case VG_USERREQ__CREATE_MEMPOOL: {
   6766          Addr pool      = (Addr)arg[1];
   6767          UInt rzB       =       arg[2];
   6768          Bool is_zeroed = (Bool)arg[3];
   6769 
   6770          MC_(create_mempool) ( pool, rzB, is_zeroed );
   6771          return True;
   6772       }
   6773 
   6774       case VG_USERREQ__DESTROY_MEMPOOL: {
   6775          Addr pool      = (Addr)arg[1];
   6776 
   6777          MC_(destroy_mempool) ( pool );
   6778          return True;
   6779       }
   6780 
   6781       case VG_USERREQ__MEMPOOL_ALLOC: {
   6782          Addr pool      = (Addr)arg[1];
   6783          Addr addr      = (Addr)arg[2];
   6784          UInt size      =       arg[3];
   6785 
   6786          MC_(mempool_alloc) ( tid, pool, addr, size );
   6787          return True;
   6788       }
   6789 
   6790       case VG_USERREQ__MEMPOOL_FREE: {
   6791          Addr pool      = (Addr)arg[1];
   6792          Addr addr      = (Addr)arg[2];
   6793 
   6794          MC_(mempool_free) ( pool, addr );
   6795          return True;
   6796       }
   6797 
   6798       case VG_USERREQ__MEMPOOL_TRIM: {
   6799          Addr pool      = (Addr)arg[1];
   6800          Addr addr      = (Addr)arg[2];
   6801          UInt size      =       arg[3];
   6802 
   6803          MC_(mempool_trim) ( pool, addr, size );
   6804          return True;
   6805       }
   6806 
   6807       case VG_USERREQ__MOVE_MEMPOOL: {
   6808          Addr poolA     = (Addr)arg[1];
   6809          Addr poolB     = (Addr)arg[2];
   6810 
   6811          MC_(move_mempool) ( poolA, poolB );
   6812          return True;
   6813       }
   6814 
   6815       case VG_USERREQ__MEMPOOL_CHANGE: {
   6816          Addr pool      = (Addr)arg[1];
   6817          Addr addrA     = (Addr)arg[2];
   6818          Addr addrB     = (Addr)arg[3];
   6819          UInt size      =       arg[4];
   6820 
   6821          MC_(mempool_change) ( pool, addrA, addrB, size );
   6822          return True;
   6823       }
   6824 
   6825       case VG_USERREQ__MEMPOOL_EXISTS: {
   6826          Addr pool      = (Addr)arg[1];
   6827 
   6828          *ret = (UWord) MC_(mempool_exists) ( pool );
   6829 	 return True;
   6830       }
   6831 
   6832       case VG_USERREQ__GDB_MONITOR_COMMAND: {
   6833          Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
   6834          if (handled)
   6835             *ret = 1;
   6836          else
   6837             *ret = 0;
   6838          return handled;
   6839       }
   6840 
   6841       case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
   6842       case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
   6843          Bool addRange
   6844             = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
   6845          Bool ok
   6846             = modify_ignore_ranges(addRange, arg[1], arg[2]);
   6847          *ret = ok ? 1 : 0;
   6848          return True;
   6849       }
   6850 
   6851       default:
   6852          VG_(message)(
   6853             Vg_UserMsg,
   6854             "Warning: unknown memcheck client request code %llx\n",
   6855             (ULong)arg[0]
   6856          );
   6857          return False;
   6858    }
   6859    return True;
   6860 }
   6861 
   6862 
   6863 /*------------------------------------------------------------*/
   6864 /*--- Crude profiling machinery.                           ---*/
   6865 /*------------------------------------------------------------*/
   6866 
   6867 // We track a number of interesting events (using PROF_EVENT)
   6868 // if MC_PROFILE_MEMORY is defined.
   6869 
   6870 #ifdef MC_PROFILE_MEMORY
   6871 
   6872 ULong  MC_(event_ctr)[MCPE_LAST];
   6873 
   6874 /* Event counter names. Use the name of the function that increases the
   6875    event counter. Drop any MC_() and mc_ prefices. */
   6876 static const HChar* MC_(event_ctr_name)[MCPE_LAST] = {
   6877    [MCPE_LOADVN_SLOW] = "LOADVn_slow",
   6878    [MCPE_LOADVN_SLOW_LOOP] = "LOADVn_slow_loop",
   6879    [MCPE_STOREVN_SLOW] = "STOREVn_slow",
   6880    [MCPE_STOREVN_SLOW_LOOP] = "STOREVn_slow(loop)",
   6881    [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED] = "make_aligned_word32_undefined",
   6882    [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW] =
   6883         "make_aligned_word32_undefined_slow",
   6884    [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED] = "make_aligned_word64_undefined",
   6885    [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW] =
   6886         "make_aligned_word64_undefined_slow",
   6887    [MCPE_MAKE_ALIGNED_WORD32_NOACCESS] = "make_aligned_word32_noaccess",
   6888    [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW] =
   6889          "make_aligned_word32_noaccess_slow",
   6890    [MCPE_MAKE_ALIGNED_WORD64_NOACCESS] = "make_aligned_word64_noaccess",
   6891    [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW] =
   6892         "make_aligned_word64_noaccess_slow",
   6893    [MCPE_MAKE_MEM_NOACCESS] = "make_mem_noaccess",
   6894    [MCPE_MAKE_MEM_UNDEFINED] = "make_mem_undefined",
   6895    [MCPE_MAKE_MEM_UNDEFINED_W_OTAG] = "make_mem_undefined_w_otag",
   6896    [MCPE_MAKE_MEM_DEFINED] = "make_mem_defined",
   6897    [MCPE_CHEAP_SANITY_CHECK] = "cheap_sanity_check",
   6898    [MCPE_EXPENSIVE_SANITY_CHECK] = "expensive_sanity_check",
   6899    [MCPE_COPY_ADDRESS_RANGE_STATE] = "copy_address_range_state",
   6900    [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1] = "copy_address_range_state(loop1)",
   6901    [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2] = "copy_address_range_state(loop2)",
   6902    [MCPE_CHECK_MEM_IS_NOACCESS] = "check_mem_is_noaccess",
   6903    [MCPE_CHECK_MEM_IS_NOACCESS_LOOP] = "check_mem_is_noaccess(loop)",
   6904    [MCPE_IS_MEM_ADDRESSABLE] = "is_mem_addressable",
   6905    [MCPE_IS_MEM_ADDRESSABLE_LOOP] = "is_mem_addressable(loop)",
   6906    [MCPE_IS_MEM_DEFINED] = "is_mem_defined",
   6907    [MCPE_IS_MEM_DEFINED_LOOP] = "is_mem_defined(loop)",
   6908    [MCPE_IS_MEM_DEFINED_COMPREHENSIVE] = "is_mem_defined_comprehensive",
   6909    [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP] =
   6910         "is_mem_defined_comprehensive(loop)",
   6911    [MCPE_IS_DEFINED_ASCIIZ] = "is_defined_asciiz",
   6912    [MCPE_IS_DEFINED_ASCIIZ_LOOP] = "is_defined_asciiz(loop)",
   6913    [MCPE_FIND_CHUNK_FOR_OLD] = "find_chunk_for_OLD",
   6914    [MCPE_FIND_CHUNK_FOR_OLD_LOOP] = "find_chunk_for_OLD(loop)",
   6915    [MCPE_SET_ADDRESS_RANGE_PERMS] = "set_address_range_perms",
   6916    [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP] =
   6917         "set_address_range_perms(single-secmap)",
   6918    [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP] =
   6919         "set_address_range_perms(startof-secmap)",
   6920    [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS] =
   6921    "set_address_range_perms(multiple-secmaps)",
   6922    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1] =
   6923         "set_address_range_perms(dist-sm1)",
   6924    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2] =
   6925         "set_address_range_perms(dist-sm2)",
   6926    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK] =
   6927         "set_address_range_perms(dist-sm1-quick)",
   6928    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK] =
   6929         "set_address_range_perms(dist-sm2-quick)",
   6930    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A] = "set_address_range_perms(loop1a)",
   6931    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B] = "set_address_range_perms(loop1b)",
   6932    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C] = "set_address_range_perms(loop1c)",
   6933    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A] = "set_address_range_perms(loop8a)",
   6934    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B] = "set_address_range_perms(loop8b)",
   6935    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K] = "set_address_range_perms(loop64K)",
   6936    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM] =
   6937         "set_address_range_perms(loop64K-free-dist-sm)",
   6938    [MCPE_LOADV_128_OR_256_SLOW_LOOP] = "LOADV_128_or_256_slow(loop)",
   6939    [MCPE_LOADV_128_OR_256]       = "LOADV_128_or_256",
   6940    [MCPE_LOADV_128_OR_256_SLOW1] = "LOADV_128_or_256-slow1",
   6941    [MCPE_LOADV_128_OR_256_SLOW2] = "LOADV_128_or_256-slow2",
   6942    [MCPE_LOADV64]        = "LOADV64",
   6943    [MCPE_LOADV64_SLOW1]  = "LOADV64-slow1",
   6944    [MCPE_LOADV64_SLOW2]  = "LOADV64-slow2",
   6945    [MCPE_STOREV64]       = "STOREV64",
   6946    [MCPE_STOREV64_SLOW1] = "STOREV64-slow1",
   6947    [MCPE_STOREV64_SLOW2] = "STOREV64-slow2",
   6948    [MCPE_STOREV64_SLOW3] = "STOREV64-slow3",
   6949    [MCPE_STOREV64_SLOW4] = "STOREV64-slow4",
   6950    [MCPE_LOADV32]        = "LOADV32",
   6951    [MCPE_LOADV32_SLOW1]  = "LOADV32-slow1",
   6952    [MCPE_LOADV32_SLOW2]  = "LOADV32-slow2",
   6953    [MCPE_STOREV32]       = "STOREV32",
   6954    [MCPE_STOREV32_SLOW1] = "STOREV32-slow1",
   6955    [MCPE_STOREV32_SLOW2] = "STOREV32-slow2",
   6956    [MCPE_STOREV32_SLOW3] = "STOREV32-slow3",
   6957    [MCPE_STOREV32_SLOW4] = "STOREV32-slow4",
   6958    [MCPE_LOADV16]        = "LOADV16",
   6959    [MCPE_LOADV16_SLOW1]  = "LOADV16-slow1",
   6960    [MCPE_LOADV16_SLOW2]  = "LOADV16-slow2",
   6961    [MCPE_STOREV16]       = "STOREV16",
   6962    [MCPE_STOREV16_SLOW1] = "STOREV16-slow1",
   6963    [MCPE_STOREV16_SLOW2] = "STOREV16-slow2",
   6964    [MCPE_STOREV16_SLOW3] = "STOREV16-slow3",
   6965    [MCPE_STOREV16_SLOW4] = "STOREV16-slow4",
   6966    [MCPE_LOADV8]         = "LOADV8",
   6967    [MCPE_LOADV8_SLOW1]   = "LOADV8-slow1",
   6968    [MCPE_LOADV8_SLOW2]   = "LOADV8-slow2",
   6969    [MCPE_STOREV8]        = "STOREV8",
   6970    [MCPE_STOREV8_SLOW1]  = "STOREV8-slow1",
   6971    [MCPE_STOREV8_SLOW2]  = "STOREV8-slow2",
   6972    [MCPE_STOREV8_SLOW3]  = "STOREV8-slow3",
   6973    [MCPE_STOREV8_SLOW4]  = "STOREV8-slow4",
   6974    [MCPE_NEW_MEM_STACK_4]   = "new_mem_stack_4",
   6975    [MCPE_NEW_MEM_STACK_8]   = "new_mem_stack_8",
   6976    [MCPE_NEW_MEM_STACK_12]  = "new_mem_stack_12",
   6977    [MCPE_NEW_MEM_STACK_16]  = "new_mem_stack_16",
   6978    [MCPE_NEW_MEM_STACK_32]  = "new_mem_stack_32",
   6979    [MCPE_NEW_MEM_STACK_112] = "new_mem_stack_112",
   6980    [MCPE_NEW_MEM_STACK_128] = "new_mem_stack_128",
   6981    [MCPE_NEW_MEM_STACK_144] = "new_mem_stack_144",
   6982    [MCPE_NEW_MEM_STACK_160] = "new_mem_stack_160",
   6983    [MCPE_DIE_MEM_STACK_4]   = "die_mem_stack_4",
   6984    [MCPE_DIE_MEM_STACK_8]   = "die_mem_stack_8",
   6985    [MCPE_DIE_MEM_STACK_12]  = "die_mem_stack_12",
   6986    [MCPE_DIE_MEM_STACK_16]  = "die_mem_stack_16",
   6987    [MCPE_DIE_MEM_STACK_32]  = "die_mem_stack_32",
   6988    [MCPE_DIE_MEM_STACK_112] = "die_mem_stack_112",
   6989    [MCPE_DIE_MEM_STACK_128] = "die_mem_stack_128",
   6990    [MCPE_DIE_MEM_STACK_144] = "die_mem_stack_144",
   6991    [MCPE_DIE_MEM_STACK_160] = "die_mem_stack_160",
   6992    [MCPE_NEW_MEM_STACK]     = "new_mem_stack",
   6993    [MCPE_DIE_MEM_STACK]     = "die_mem_stack",
   6994 };
   6995 
   6996 static void init_prof_mem ( void )
   6997 {
   6998    Int i, name_count = 0;
   6999 
   7000    for (i = 0; i < MCPE_LAST; i++) {
   7001       MC_(event_ctr)[i] = 0;
   7002       if (MC_(event_ctr_name)[i] != NULL)
   7003          ++name_count;
   7004    }
   7005 
   7006    /* Make sure every profiling event has a name */
   7007    tl_assert(name_count == MCPE_LAST);
   7008 }
   7009 
   7010 static void done_prof_mem ( void )
   7011 {
   7012    Int  i, n;
   7013    Bool spaced = False;
   7014    for (i = n = 0; i < MCPE_LAST; i++) {
   7015       if (!spaced && (n % 10) == 0) {
   7016          VG_(printf)("\n");
   7017          spaced = True;
   7018       }
   7019       if (MC_(event_ctr)[i] > 0) {
   7020          spaced = False;
   7021          ++n;
   7022          VG_(printf)( "prof mem event %3d: %11llu   %s\n",
   7023                       i, MC_(event_ctr)[i],
   7024                       MC_(event_ctr_name)[i]);
   7025       }
   7026    }
   7027 }
   7028 
   7029 #else
   7030 
   7031 static void init_prof_mem ( void ) { }
   7032 static void done_prof_mem ( void ) { }
   7033 
   7034 #endif
   7035 
   7036 
   7037 /*------------------------------------------------------------*/
   7038 /*--- Origin tracking stuff                                ---*/
   7039 /*------------------------------------------------------------*/
   7040 
   7041 /*--------------------------------------------*/
   7042 /*--- Origin tracking: load handlers       ---*/
   7043 /*--------------------------------------------*/
   7044 
   7045 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
   7046    return or1 > or2 ? or1 : or2;
   7047 }
   7048 
   7049 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
   7050    OCacheLine* line;
   7051    UChar descr;
   7052    UWord lineoff = oc_line_offset(a);
   7053    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   7054 
   7055    if (OC_ENABLE_ASSERTIONS) {
   7056       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   7057    }
   7058 
   7059    line = find_OCacheLine( a );
   7060 
   7061    descr = line->descr[lineoff];
   7062    if (OC_ENABLE_ASSERTIONS) {
   7063       tl_assert(descr < 0x10);
   7064    }
   7065 
   7066    if (LIKELY(0 == (descr & (1 << byteoff))))  {
   7067       return 0;
   7068    } else {
   7069       return line->w32[lineoff];
   7070    }
   7071 }
   7072 
   7073 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
   7074    OCacheLine* line;
   7075    UChar descr;
   7076    UWord lineoff, byteoff;
   7077 
   7078    if (UNLIKELY(a & 1)) {
   7079       /* Handle misaligned case, slowly. */
   7080       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
   7081       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
   7082       return merge_origins(oLo, oHi);
   7083    }
   7084 
   7085    lineoff = oc_line_offset(a);
   7086    byteoff = a & 3; /* 0 or 2 */
   7087 
   7088    if (OC_ENABLE_ASSERTIONS) {
   7089       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   7090    }
   7091    line = find_OCacheLine( a );
   7092 
   7093    descr = line->descr[lineoff];
   7094    if (OC_ENABLE_ASSERTIONS) {
   7095       tl_assert(descr < 0x10);
   7096    }
   7097 
   7098    if (LIKELY(0 == (descr & (3 << byteoff)))) {
   7099       return 0;
   7100    } else {
   7101       return line->w32[lineoff];
   7102    }
   7103 }
   7104 
   7105 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
   7106    OCacheLine* line;
   7107    UChar descr;
   7108    UWord lineoff;
   7109 
   7110    if (UNLIKELY(a & 3)) {
   7111       /* Handle misaligned case, slowly. */
   7112       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
   7113       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
   7114       return merge_origins(oLo, oHi);
   7115    }
   7116 
   7117    lineoff = oc_line_offset(a);
   7118    if (OC_ENABLE_ASSERTIONS) {
   7119       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   7120    }
   7121 
   7122    line = find_OCacheLine( a );
   7123 
   7124    descr = line->descr[lineoff];
   7125    if (OC_ENABLE_ASSERTIONS) {
   7126       tl_assert(descr < 0x10);
   7127    }
   7128 
   7129    if (LIKELY(0 == descr)) {
   7130       return 0;
   7131    } else {
   7132       return line->w32[lineoff];
   7133    }
   7134 }
   7135 
   7136 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
   7137    OCacheLine* line;
   7138    UChar descrLo, descrHi, descr;
   7139    UWord lineoff;
   7140 
   7141    if (UNLIKELY(a & 7)) {
   7142       /* Handle misaligned case, slowly. */
   7143       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
   7144       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
   7145       return merge_origins(oLo, oHi);
   7146    }
   7147 
   7148    lineoff = oc_line_offset(a);
   7149    if (OC_ENABLE_ASSERTIONS) {
   7150       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   7151    }
   7152 
   7153    line = find_OCacheLine( a );
   7154 
   7155    descrLo = line->descr[lineoff + 0];
   7156    descrHi = line->descr[lineoff + 1];
   7157    descr   = descrLo | descrHi;
   7158    if (OC_ENABLE_ASSERTIONS) {
   7159       tl_assert(descr < 0x10);
   7160    }
   7161 
   7162    if (LIKELY(0 == descr)) {
   7163       return 0; /* both 32-bit chunks are defined */
   7164    } else {
   7165       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
   7166       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
   7167       return merge_origins(oLo, oHi);
   7168    }
   7169 }
   7170 
   7171 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
   7172    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
   7173    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
   7174    UInt oBoth = merge_origins(oLo, oHi);
   7175    return (UWord)oBoth;
   7176 }
   7177 
   7178 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
   7179    UInt oQ0   = (UInt)MC_(helperc_b_load8)( a + 0 );
   7180    UInt oQ1   = (UInt)MC_(helperc_b_load8)( a + 8 );
   7181    UInt oQ2   = (UInt)MC_(helperc_b_load8)( a + 16 );
   7182    UInt oQ3   = (UInt)MC_(helperc_b_load8)( a + 24 );
   7183    UInt oAll  = merge_origins(merge_origins(oQ0, oQ1),
   7184                               merge_origins(oQ2, oQ3));
   7185    return (UWord)oAll;
   7186 }
   7187 
   7188 
   7189 /*--------------------------------------------*/
   7190 /*--- Origin tracking: store handlers      ---*/
   7191 /*--------------------------------------------*/
   7192 
   7193 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
   7194    OCacheLine* line;
   7195    UWord lineoff = oc_line_offset(a);
   7196    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   7197 
   7198    if (OC_ENABLE_ASSERTIONS) {
   7199       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   7200    }
   7201 
   7202    line = find_OCacheLine( a );
   7203 
   7204    if (d32 == 0) {
   7205       line->descr[lineoff] &= ~(1 << byteoff);
   7206    } else {
   7207       line->descr[lineoff] |= (1 << byteoff);
   7208       line->w32[lineoff] = d32;
   7209    }
   7210 }
   7211 
   7212 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
   7213    OCacheLine* line;
   7214    UWord lineoff, byteoff;
   7215 
   7216    if (UNLIKELY(a & 1)) {
   7217       /* Handle misaligned case, slowly. */
   7218       MC_(helperc_b_store1)( a + 0, d32 );
   7219       MC_(helperc_b_store1)( a + 1, d32 );
   7220       return;
   7221    }
   7222 
   7223    lineoff = oc_line_offset(a);
   7224    byteoff = a & 3; /* 0 or 2 */
   7225 
   7226    if (OC_ENABLE_ASSERTIONS) {
   7227       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   7228    }
   7229 
   7230    line = find_OCacheLine( a );
   7231 
   7232    if (d32 == 0) {
   7233       line->descr[lineoff] &= ~(3 << byteoff);
   7234    } else {
   7235       line->descr[lineoff] |= (3 << byteoff);
   7236       line->w32[lineoff] = d32;
   7237    }
   7238 }
   7239 
   7240 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
   7241    OCacheLine* line;
   7242    UWord lineoff;
   7243 
   7244    if (UNLIKELY(a & 3)) {
   7245       /* Handle misaligned case, slowly. */
   7246       MC_(helperc_b_store2)( a + 0, d32 );
   7247       MC_(helperc_b_store2)( a + 2, d32 );
   7248       return;
   7249    }
   7250 
   7251    lineoff = oc_line_offset(a);
   7252    if (OC_ENABLE_ASSERTIONS) {
   7253       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   7254    }
   7255 
   7256    line = find_OCacheLine( a );
   7257 
   7258    if (d32 == 0) {
   7259       line->descr[lineoff] = 0;
   7260    } else {
   7261       line->descr[lineoff] = 0xF;
   7262       line->w32[lineoff] = d32;
   7263    }
   7264 }
   7265 
   7266 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
   7267    OCacheLine* line;
   7268    UWord lineoff;
   7269 
   7270    if (UNLIKELY(a & 7)) {
   7271       /* Handle misaligned case, slowly. */
   7272       MC_(helperc_b_store4)( a + 0, d32 );
   7273       MC_(helperc_b_store4)( a + 4, d32 );
   7274       return;
   7275    }
   7276 
   7277    lineoff = oc_line_offset(a);
   7278    if (OC_ENABLE_ASSERTIONS) {
   7279       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   7280    }
   7281 
   7282    line = find_OCacheLine( a );
   7283 
   7284    if (d32 == 0) {
   7285       line->descr[lineoff + 0] = 0;
   7286       line->descr[lineoff + 1] = 0;
   7287    } else {
   7288       line->descr[lineoff + 0] = 0xF;
   7289       line->descr[lineoff + 1] = 0xF;
   7290       line->w32[lineoff + 0] = d32;
   7291       line->w32[lineoff + 1] = d32;
   7292    }
   7293 }
   7294 
   7295 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
   7296    MC_(helperc_b_store8)( a + 0, d32 );
   7297    MC_(helperc_b_store8)( a + 8, d32 );
   7298 }
   7299 
   7300 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
   7301    MC_(helperc_b_store8)( a +  0, d32 );
   7302    MC_(helperc_b_store8)( a +  8, d32 );
   7303    MC_(helperc_b_store8)( a + 16, d32 );
   7304    MC_(helperc_b_store8)( a + 24, d32 );
   7305 }
   7306 
   7307 
   7308 /*--------------------------------------------*/
   7309 /*--- Origin tracking: sarp handlers       ---*/
   7310 /*--------------------------------------------*/
   7311 
   7312 __attribute__((noinline))
   7313 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
   7314    if ((a & 1) && len >= 1) {
   7315       MC_(helperc_b_store1)( a, otag );
   7316       a++;
   7317       len--;
   7318    }
   7319    if ((a & 2) && len >= 2) {
   7320       MC_(helperc_b_store2)( a, otag );
   7321       a += 2;
   7322       len -= 2;
   7323    }
   7324    if (len >= 4)
   7325       tl_assert(0 == (a & 3));
   7326    while (len >= 4) {
   7327       MC_(helperc_b_store4)( a, otag );
   7328       a += 4;
   7329       len -= 4;
   7330    }
   7331    if (len >= 2) {
   7332       MC_(helperc_b_store2)( a, otag );
   7333       a += 2;
   7334       len -= 2;
   7335    }
   7336    if (len >= 1) {
   7337       MC_(helperc_b_store1)( a, otag );
   7338       //a++;
   7339       len--;
   7340    }
   7341    tl_assert(len == 0);
   7342 }
   7343 
   7344 __attribute__((noinline))
   7345 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
   7346    if ((a & 1) && len >= 1) {
   7347       MC_(helperc_b_store1)( a, 0 );
   7348       a++;
   7349       len--;
   7350    }
   7351    if ((a & 2) && len >= 2) {
   7352       MC_(helperc_b_store2)( a, 0 );
   7353       a += 2;
   7354       len -= 2;
   7355    }
   7356    if (len >= 4)
   7357       tl_assert(0 == (a & 3));
   7358    while (len >= 4) {
   7359       MC_(helperc_b_store4)( a, 0 );
   7360       a += 4;
   7361       len -= 4;
   7362    }
   7363    if (len >= 2) {
   7364       MC_(helperc_b_store2)( a, 0 );
   7365       a += 2;
   7366       len -= 2;
   7367    }
   7368    if (len >= 1) {
   7369       MC_(helperc_b_store1)( a, 0 );
   7370       //a++;
   7371       len--;
   7372    }
   7373    tl_assert(len == 0);
   7374 }
   7375 
   7376 
   7377 /*------------------------------------------------------------*/
   7378 /*--- Setup and finalisation                               ---*/
   7379 /*------------------------------------------------------------*/
   7380 
   7381 static void mc_post_clo_init ( void )
   7382 {
   7383    /* If we've been asked to emit XML, mash around various other
   7384       options so as to constrain the output somewhat. */
   7385    if (VG_(clo_xml)) {
   7386       /* Extract as much info as possible from the leak checker. */
   7387       MC_(clo_leak_check) = LC_Full;
   7388    }
   7389 
   7390    if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
   7391       VG_(message)(Vg_UserMsg,
   7392                    "Warning: --freelist-big-blocks value %lld has no effect\n"
   7393                    "as it is >= to --freelist-vol value %lld\n",
   7394                    MC_(clo_freelist_big_blocks),
   7395                    MC_(clo_freelist_vol));
   7396 
   7397    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   7398 
   7399    if (MC_(clo_mc_level) == 3) {
   7400       /* We're doing origin tracking. */
   7401 #     ifdef PERF_FAST_STACK
   7402       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
   7403       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
   7404       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
   7405       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
   7406       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
   7407       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
   7408       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
   7409       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
   7410       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
   7411 #     endif
   7412       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
   7413       VG_(track_new_mem_stack_signal)    ( mc_new_mem_w_tid_make_ECU );
   7414    } else {
   7415       /* Not doing origin tracking */
   7416 #     ifdef PERF_FAST_STACK
   7417       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
   7418       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
   7419       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
   7420       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
   7421       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
   7422       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
   7423       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
   7424       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
   7425       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
   7426 #     endif
   7427       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
   7428       VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
   7429    }
   7430 
   7431    // We assume that brk()/sbrk() does not initialise new memory.  Is this
   7432    // accurate?  John Reiser says:
   7433    //
   7434    //   0) sbrk() can *decrease* process address space.  No zero fill is done
   7435    //   for a decrease, not even the fragment on the high end of the last page
   7436    //   that is beyond the new highest address.  For maximum safety and
   7437    //   portability, then the bytes in the last page that reside above [the
   7438    //   new] sbrk(0) should be considered to be uninitialized, but in practice
   7439    //   it is exceedingly likely that they will retain their previous
   7440    //   contents.
   7441    //
   7442    //   1) If an increase is large enough to require new whole pages, then
   7443    //   those new whole pages (like all new pages) are zero-filled by the
   7444    //   operating system.  So if sbrk(0) already is page aligned, then
   7445    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
   7446    //
   7447    //   2) Any increase that lies within an existing allocated page is not
   7448    //   changed.  So if (x = sbrk(0)) is not page aligned, then
   7449    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
   7450    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
   7451    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
   7452    //   of them come along for the ride because the operating system deals
   7453    //   only in whole pages.  Again, for maximum safety and portability, then
   7454    //   anything that lives above [the new] sbrk(0) should be considered
   7455    //   uninitialized, but in practice will retain previous contents [zero in
   7456    //   this case.]"
   7457    //
   7458    // In short:
   7459    //
   7460    //   A key property of sbrk/brk is that new whole pages that are supplied
   7461    //   by the operating system *do* get initialized to zero.
   7462    //
   7463    // As for the portability of all this:
   7464    //
   7465    //   sbrk and brk are not POSIX.  However, any system that is a derivative
   7466    //   of *nix has sbrk and brk because there are too many softwares (such as
   7467    //   the Bourne shell) which rely on the traditional memory map (.text,
   7468    //   .data+.bss, stack) and the existence of sbrk/brk.
   7469    //
   7470    // So we should arguably observe all this.  However:
   7471    // - The current inaccuracy has caused maybe one complaint in seven years(?)
   7472    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
   7473    //   doubt most programmers know the above information.
   7474    // So I'm not terribly unhappy with marking it as undefined. --njn.
   7475    //
   7476    // [More:  I think most of what John said only applies to sbrk().  It seems
   7477    // that brk() always deals in whole pages.  And since this event deals
   7478    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
   7479    // just mark all memory it allocates as defined.]
   7480    //
   7481 #  if !defined(VGO_solaris)
   7482    if (MC_(clo_mc_level) == 3)
   7483       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_make_ECU );
   7484    else
   7485       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_no_ECU );
   7486 #  else
   7487    // On Solaris, brk memory has to be marked as defined, otherwise we get
   7488    // many false positives.
   7489    VG_(track_new_mem_brk)         ( make_mem_defined_w_tid );
   7490 #  endif
   7491 
   7492    /* This origin tracking cache is huge (~100M), so only initialise
   7493       if we need it. */
   7494    if (MC_(clo_mc_level) >= 3) {
   7495       init_OCache();
   7496       tl_assert(ocacheL1 != NULL);
   7497       tl_assert(ocacheL2 != NULL);
   7498    } else {
   7499       tl_assert(ocacheL1 == NULL);
   7500       tl_assert(ocacheL2 == NULL);
   7501    }
   7502 
   7503    MC_(chunk_poolalloc) = VG_(newPA)
   7504       (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
   7505        1000,
   7506        VG_(malloc),
   7507        "mc.cMC.1 (MC_Chunk pools)",
   7508        VG_(free));
   7509 
   7510    /* Do not check definedness of guest state if --undef-value-errors=no */
   7511    if (MC_(clo_mc_level) >= 2)
   7512       VG_(track_pre_reg_read) ( mc_pre_reg_read );
   7513 }
   7514 
   7515 static void print_SM_info(const HChar* type, Int n_SMs)
   7516 {
   7517    VG_(message)(Vg_DebugMsg,
   7518       " memcheck: SMs: %s = %d (%luk, %luM)\n",
   7519       type,
   7520       n_SMs,
   7521       n_SMs * sizeof(SecMap) / 1024UL,
   7522       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
   7523 }
   7524 
   7525 static void mc_print_stats (void)
   7526 {
   7527    SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
   7528 
   7529    VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
   7530                 VG_(free_queue_volume), VG_(free_queue_length));
   7531    VG_(message)(Vg_DebugMsg,
   7532       " memcheck: sanity checks: %d cheap, %d expensive\n",
   7533       n_sanity_cheap, n_sanity_expensive );
   7534    VG_(message)(Vg_DebugMsg,
   7535       " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
   7536       n_auxmap_L2_nodes,
   7537       n_auxmap_L2_nodes * 64,
   7538       n_auxmap_L2_nodes / 16 );
   7539    VG_(message)(Vg_DebugMsg,
   7540       " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
   7541       n_auxmap_L1_searches, n_auxmap_L1_cmps,
   7542       (10ULL * n_auxmap_L1_cmps)
   7543          / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
   7544    );
   7545    VG_(message)(Vg_DebugMsg,
   7546       " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
   7547       n_auxmap_L2_searches, n_auxmap_L2_nodes
   7548    );
   7549 
   7550    print_SM_info("n_issued     ", n_issued_SMs);
   7551    print_SM_info("n_deissued   ", n_deissued_SMs);
   7552    print_SM_info("max_noaccess ", max_noaccess_SMs);
   7553    print_SM_info("max_undefined", max_undefined_SMs);
   7554    print_SM_info("max_defined  ", max_defined_SMs);
   7555    print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
   7556 
   7557    // Three DSMs, plus the non-DSM ones
   7558    max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
   7559    // The 3*sizeof(Word) bytes is the AVL node metadata size.
   7560    // The VG_ROUNDUP is because the OSet pool allocator will/must align
   7561    // the elements on pointer size.
   7562    // Note that the pool allocator has some additional small overhead
   7563    // which is not counted in the below.
   7564    // Hardwiring this logic sucks, but I don't see how else to do it.
   7565    max_secVBit_szB = max_secVBit_nodes *
   7566          (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
   7567    max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
   7568 
   7569    VG_(message)(Vg_DebugMsg,
   7570       " memcheck: max sec V bit nodes:    %d (%luk, %luM)\n",
   7571       max_secVBit_nodes, max_secVBit_szB / 1024,
   7572                          max_secVBit_szB / (1024 * 1024));
   7573    VG_(message)(Vg_DebugMsg,
   7574       " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
   7575       sec_vbits_new_nodes + sec_vbits_updates,
   7576       sec_vbits_new_nodes, sec_vbits_updates );
   7577    VG_(message)(Vg_DebugMsg,
   7578       " memcheck: max shadow mem size:   %luk, %luM\n",
   7579       max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
   7580 
   7581    if (MC_(clo_mc_level) >= 3) {
   7582       VG_(message)(Vg_DebugMsg,
   7583                    " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
   7584                    stats_ocacheL1_find,
   7585                    stats_ocacheL1_misses,
   7586                    stats_ocacheL1_lossage );
   7587       VG_(message)(Vg_DebugMsg,
   7588                    " ocacheL1: %'12lu at 0   %'12lu at 1\n",
   7589                    stats_ocacheL1_find - stats_ocacheL1_misses
   7590                       - stats_ocacheL1_found_at_1
   7591                       - stats_ocacheL1_found_at_N,
   7592                    stats_ocacheL1_found_at_1 );
   7593       VG_(message)(Vg_DebugMsg,
   7594                    " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
   7595                    stats_ocacheL1_found_at_N,
   7596                    stats_ocacheL1_movefwds );
   7597       VG_(message)(Vg_DebugMsg,
   7598                    " ocacheL1: %'12lu sizeB  %'12d useful\n",
   7599                    (SizeT)sizeof(OCache),
   7600                    4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
   7601       VG_(message)(Vg_DebugMsg,
   7602                    " ocacheL2: %'12lu refs   %'12lu misses\n",
   7603                    stats__ocacheL2_refs,
   7604                    stats__ocacheL2_misses );
   7605       VG_(message)(Vg_DebugMsg,
   7606                    " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
   7607                    stats__ocacheL2_n_nodes_max,
   7608                    stats__ocacheL2_n_nodes );
   7609       VG_(message)(Vg_DebugMsg,
   7610                    " niacache: %'12lu refs   %'12lu misses\n",
   7611                    stats__nia_cache_queries, stats__nia_cache_misses);
   7612    } else {
   7613       tl_assert(ocacheL1 == NULL);
   7614       tl_assert(ocacheL2 == NULL);
   7615    }
   7616 }
   7617 
   7618 
   7619 static void mc_fini ( Int exitcode )
   7620 {
   7621    MC_(print_malloc_stats)();
   7622 
   7623    if (MC_(clo_leak_check) != LC_Off) {
   7624       LeakCheckParams lcp;
   7625       lcp.mode = MC_(clo_leak_check);
   7626       lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
   7627       lcp.heuristics = MC_(clo_leak_check_heuristics);
   7628       lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
   7629       lcp.deltamode = LCD_Any;
   7630       lcp.max_loss_records_output = 999999999;
   7631       lcp.requested_by_monitor_command = False;
   7632       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
   7633    } else {
   7634       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   7635          VG_(umsg)(
   7636             "For a detailed leak analysis, rerun with: --leak-check=full\n"
   7637             "\n"
   7638          );
   7639       }
   7640    }
   7641 
   7642    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   7643       VG_(message)(Vg_UserMsg,
   7644                    "For counts of detected and suppressed errors, rerun with: -v\n");
   7645    }
   7646 
   7647    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
   7648        && MC_(clo_mc_level) == 2) {
   7649       VG_(message)(Vg_UserMsg,
   7650                    "Use --track-origins=yes to see where "
   7651                    "uninitialised values come from\n");
   7652    }
   7653 
   7654    /* Print a warning if any client-request generated ignore-ranges
   7655       still exist.  It would be reasonable to expect that a properly
   7656       written program would remove any such ranges before exiting, and
   7657       since they are a bit on the dangerous side, let's comment.  By
   7658       contrast ranges which are specified on the command line normally
   7659       pertain to hardware mapped into the address space, and so we
   7660       can't expect the client to have got rid of them. */
   7661    if (gIgnoredAddressRanges) {
   7662       UInt i, nBad = 0;
   7663       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
   7664          UWord val     = IAR_INVALID;
   7665          UWord key_min = ~(UWord)0;
   7666          UWord key_max = (UWord)0;
   7667          VG_(indexRangeMap)( &key_min, &key_max, &val,
   7668                              gIgnoredAddressRanges, i );
   7669          if (val != IAR_ClientReq)
   7670            continue;
   7671          /* Print the offending range.  Also, if it is the first,
   7672             print a banner before it. */
   7673          nBad++;
   7674          if (nBad == 1) {
   7675             VG_(umsg)(
   7676               "WARNING: exiting program has the following client-requested\n"
   7677               "WARNING: address error disablement range(s) still in force,\n"
   7678               "WARNING: "
   7679                  "possibly as a result of some mistake in the use of the\n"
   7680               "WARNING: "
   7681                  "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
   7682             );
   7683          }
   7684          VG_(umsg)("   [%u]  0x%016lx-0x%016lx  %s\n",
   7685                    i, key_min, key_max, showIARKind(val));
   7686       }
   7687    }
   7688 
   7689    done_prof_mem();
   7690 
   7691    if (VG_(clo_stats))
   7692       mc_print_stats();
   7693 
   7694    if (0) {
   7695       VG_(message)(Vg_DebugMsg,
   7696         "------ Valgrind's client block stats follow ---------------\n" );
   7697       show_client_block_stats();
   7698    }
   7699 }
   7700 
   7701 /* mark the given addr/len unaddressable for watchpoint implementation
   7702    The PointKind will be handled at access time */
   7703 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
   7704                                                   Addr addr, SizeT len)
   7705 {
   7706    /* GDBTD this is somewhat fishy. We might rather have to save the previous
   7707       accessibility and definedness in gdbserver so as to allow restoring it
   7708       properly. Currently, we assume that the user only watches things
   7709       which are properly addressable and defined */
   7710    if (insert)
   7711       MC_(make_mem_noaccess) (addr, len);
   7712    else
   7713       MC_(make_mem_defined)  (addr, len);
   7714    return True;
   7715 }
   7716 
   7717 static void mc_pre_clo_init(void)
   7718 {
   7719    VG_(details_name)            ("Memcheck");
   7720    VG_(details_version)         (NULL);
   7721    VG_(details_description)     ("a memory error detector");
   7722    VG_(details_copyright_author)(
   7723       "Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.");
   7724    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   7725    VG_(details_avg_translation_sizeB) ( 640 );
   7726 
   7727    VG_(basic_tool_funcs)          (mc_post_clo_init,
   7728                                    MC_(instrument),
   7729                                    mc_fini);
   7730 
   7731    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
   7732 
   7733 
   7734    VG_(needs_core_errors)         ();
   7735    VG_(needs_tool_errors)         (MC_(eq_Error),
   7736                                    MC_(before_pp_Error),
   7737                                    MC_(pp_Error),
   7738                                    True,/*show TIDs for errors*/
   7739                                    MC_(update_Error_extra),
   7740                                    MC_(is_recognised_suppression),
   7741                                    MC_(read_extra_suppression_info),
   7742                                    MC_(error_matches_suppression),
   7743                                    MC_(get_error_name),
   7744                                    MC_(get_extra_suppression_info),
   7745                                    MC_(print_extra_suppression_use),
   7746                                    MC_(update_extra_suppression_use));
   7747    VG_(needs_libc_freeres)        ();
   7748    VG_(needs_command_line_options)(mc_process_cmd_line_options,
   7749                                    mc_print_usage,
   7750                                    mc_print_debug_usage);
   7751    VG_(needs_client_requests)     (mc_handle_client_request);
   7752    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
   7753                                    mc_expensive_sanity_check);
   7754    VG_(needs_print_stats)         (mc_print_stats);
   7755    VG_(needs_info_location)       (MC_(pp_describe_addr));
   7756    VG_(needs_malloc_replacement)  (MC_(malloc),
   7757                                    MC_(__builtin_new),
   7758                                    MC_(__builtin_vec_new),
   7759                                    MC_(memalign),
   7760                                    MC_(calloc),
   7761                                    MC_(free),
   7762                                    MC_(__builtin_delete),
   7763                                    MC_(__builtin_vec_delete),
   7764                                    MC_(realloc),
   7765                                    MC_(malloc_usable_size),
   7766                                    MC_MALLOC_DEFAULT_REDZONE_SZB );
   7767    MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
   7768 
   7769    VG_(needs_xml_output)          ();
   7770 
   7771    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
   7772 
   7773    // Handling of mmap and mprotect isn't simple (well, it is simple,
   7774    // but the justification isn't.)  See comments above, just prior to
   7775    // mc_new_mem_mmap.
   7776    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
   7777    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
   7778 
   7779    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
   7780 
   7781    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
   7782    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
   7783    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
   7784 
   7785    /* Defer the specification of the new_mem_stack functions to the
   7786       post_clo_init function, since we need to first parse the command
   7787       line before deciding which set to use. */
   7788 
   7789 #  ifdef PERF_FAST_STACK
   7790    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
   7791    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
   7792    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
   7793    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
   7794    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
   7795    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
   7796    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
   7797    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
   7798    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
   7799 #  endif
   7800    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
   7801 
   7802    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
   7803 
   7804    VG_(track_pre_mem_read)        ( check_mem_is_defined );
   7805    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
   7806    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
   7807    VG_(track_post_mem_write)      ( mc_post_mem_write );
   7808 
   7809    VG_(track_post_reg_write)                  ( mc_post_reg_write );
   7810    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
   7811 
   7812    if (MC_(clo_mc_level) >= 2) {
   7813       VG_(track_copy_mem_to_reg)  ( mc_copy_mem_to_reg );
   7814       VG_(track_copy_reg_to_mem)  ( mc_copy_reg_to_mem );
   7815    }
   7816 
   7817    VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
   7818 
   7819    init_shadow_memory();
   7820    // MC_(chunk_poolalloc) must be allocated in post_clo_init
   7821    tl_assert(MC_(chunk_poolalloc) == NULL);
   7822    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
   7823    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
   7824    init_prof_mem();
   7825 
   7826    tl_assert( mc_expensive_sanity_check() );
   7827 
   7828    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
   7829    tl_assert(sizeof(UWord) == sizeof(Addr));
   7830    // Call me paranoid.  I don't care.
   7831    tl_assert(sizeof(void*) == sizeof(Addr));
   7832 
   7833    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
   7834    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
   7835 
   7836    /* This is small.  Always initialise it. */
   7837    init_nia_to_ecu_cache();
   7838 
   7839    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
   7840       if we need to, since the command line args haven't been
   7841       processed yet.  Hence defer it to mc_post_clo_init. */
   7842    tl_assert(ocacheL1 == NULL);
   7843    tl_assert(ocacheL2 == NULL);
   7844 
   7845    /* Check some important stuff.  See extensive comments above
   7846       re UNALIGNED_OR_HIGH for background. */
   7847 #  if VG_WORDSIZE == 4
   7848    tl_assert(sizeof(void*) == 4);
   7849    tl_assert(sizeof(Addr)  == 4);
   7850    tl_assert(sizeof(UWord) == 4);
   7851    tl_assert(sizeof(Word)  == 4);
   7852    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
   7853    tl_assert(MASK(1) == 0UL);
   7854    tl_assert(MASK(2) == 1UL);
   7855    tl_assert(MASK(4) == 3UL);
   7856    tl_assert(MASK(8) == 7UL);
   7857 #  else
   7858    tl_assert(VG_WORDSIZE == 8);
   7859    tl_assert(sizeof(void*) == 8);
   7860    tl_assert(sizeof(Addr)  == 8);
   7861    tl_assert(sizeof(UWord) == 8);
   7862    tl_assert(sizeof(Word)  == 8);
   7863    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFFULL);
   7864    tl_assert(MASK(1) == 0xFFFFFFF000000000ULL);
   7865    tl_assert(MASK(2) == 0xFFFFFFF000000001ULL);
   7866    tl_assert(MASK(4) == 0xFFFFFFF000000003ULL);
   7867    tl_assert(MASK(8) == 0xFFFFFFF000000007ULL);
   7868 #  endif
   7869 }
   7870 
   7871 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
   7872 
   7873 /*--------------------------------------------------------------------*/
   7874 /*--- end                                                mc_main.c ---*/
   7875 /*--------------------------------------------------------------------*/
   7876